From ffc1993bbf5c47a871c32df419f0018b853ed373 Mon Sep 17 00:00:00 2001 From: rlskoeser <rebecca.s.koeser@princeton.edu> Date: Thu, 20 Feb 2025 16:23:53 -0500 Subject: [PATCH 01/10] Add validation and type conversion to interval init --- src/undate/undate.py | 4 ++++ tests/test_converters/test_edtf.py | 8 ++++---- tests/test_undate.py | 12 ++++++++---- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 2008914..f2d5300 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -72,6 +72,10 @@ def __init__( label: Optional[str] = None, calendar: Optional[Union[str, Calendar]] = None, ): + # everything is optional but something is required + if all([val is None for val in [year, month, day]]): + raise ValueError("At least one of year, month, or day must be specified") + # keep track of initial values and which values are known # TODO: add validation: if str, must be expected length self.initial_values: Dict[str, Optional[Union[int, str]]] = { diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index 5210e98..5c98446 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -64,8 +64,8 @@ def test_to_string(self): # if converter can't generate a string for the date, # it should return a value error - empty_undate = Undate() - empty_undate.precision = DatePrecision.DECADE - with pytest.raises(ValueError): - EDTFDateConverter().to_string(empty_undate) + # empty_undate = Undate() # undate with no date information no longer supported + # empty_undate.precision = DatePrecision.DECADE + # with pytest.raises(ValueError): + # EDTFDateConverter().to_string(empty_undate) # TODO: override missing digit and confirm replacement diff --git a/tests/test_undate.py b/tests/test_undate.py index 8f8a5c8..46fe973 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -132,7 +132,10 @@ def test_calendar(self): def test_init_invalid(self): with pytest.raises(ValueError): - Undate("19xx") + Undate("19??") + + with pytest.raises(ValueError, match="At least one of year, month, or day"): + Undate() def test_invalid_date(self): # invalid month should raise an error @@ -156,10 +159,11 @@ def test_year_property(self): # unset year assert Undate(month=12, day=31).year == "XXXX" + # NOTE: no longer supported to inistalize undate with no date information # force method to hit conditional for date precision - some_century = Undate() - some_century.precision = DatePrecision.CENTURY - assert some_century.year is None + # some_century = Undate() + # some_century.precision = DatePrecision.CENTURY + # assert some_century.year is None def test_month_property(self): # one, two digit month From 710c66a9a50c16d015bb412eceea29162b837865 Mon Sep 17 00:00:00 2001 From: rlskoeser <rebecca.s.koeser@princeton.edu> Date: Thu, 20 Feb 2025 17:03:31 -0500 Subject: [PATCH 02/10] Implement & test an intersection method for UndateInterval --- src/undate/interval.py | 24 ++++++++++++++++++++++++ tests/test_interval.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/src/undate/interval.py b/src/undate/interval.py index 33ec200..eb91297 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -122,3 +122,27 @@ def duration(self) -> Timedelta: # is there any meaningful way to calculate duration # if one year is known and the other is not? raise NotImplementedError + + def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: + """Determine the intersection or overlap between two :class:`UndateInterval` + objects and return a new interval, or None if no overlap. + """ + try: + # when both values are defined, return the inner bounds; + # if not, return whichever is not None, or None + earliest = ( + max(self.earliest, other.earliest) + if self.earliest and other.earliest + else self.earliest or other.earliest + ) + latest = ( + min(self.latest, other.latest) + if self.latest and other.latest + else self.latest or other.latest + ) + + # if this results in an invalid interval, initialization + # will throw an exception + return UndateInterval(earliest, latest) + except ValueError: + return None diff --git a/tests/test_interval.py b/tests/test_interval.py index dea8710..3d49179 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -143,3 +143,35 @@ def test_duration(self): # one year set and the other not currently raises not implemented error with pytest.raises(NotImplementedError): UndateInterval(Undate(2000), Undate(month=10)).duration() + + def test_intersection(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + # no intersection + assert century11th.intersection(century20th) is None + # should work in either direction + assert century20th.intersection(century11th) is None + + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # intersection of an interval completely contained in another + # returns an interval equivalent to the smaller one + assert century20th.intersection(decade1990s) == decade1990s + assert decade1990s.intersection(century20th) == decade1990s + + # partial overlap + nineties_oughts = UndateInterval(Undate(1990), Undate(2009)) + assert century20th.intersection(nineties_oughts) == UndateInterval( + Undate(1990), Undate(2000) + ) + + # intersections between half open intervals + after_c11th = UndateInterval(Undate(1001), None) + assert after_c11th.intersection(century20th) == century20th + assert after_c11th.intersection(decade1990s) == decade1990s + + before_20th = UndateInterval(None, Undate(1901)) + assert before_20th.intersection(decade1990s) is None + assert before_20th.intersection(century11th) == century11th + assert before_20th.intersection(after_c11th) == UndateInterval( + Undate(1001), Undate(1901) + ) From 298bb19ba8911dd98a1f4b804750fa30f56196db Mon Sep 17 00:00:00 2001 From: rlskoeser <rebecca.s.koeser@princeton.edu> Date: Fri, 21 Feb 2025 11:38:48 -0500 Subject: [PATCH 03/10] Make conversion to undate more reusable and extensible --- src/undate/interval.py | 23 +++++++++------------ src/undate/undate.py | 45 +++++++++++++++++++++++++++--------------- tests/test_undate.py | 11 ++++++++--- 3 files changed, 46 insertions(+), 33 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index eb91297..33c3046 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -1,5 +1,3 @@ -import datetime - # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Optional, Union @@ -34,21 +32,18 @@ def __init__( latest: Optional[Undate] = None, label: Optional[str] = None, ): - # for now, assume takes two undate objects; - # support conversion from datetime - if earliest and not isinstance(earliest, Undate): - # NOTE: some overlap with Undate._comparison_type method - # maybe support conversion from other formats later - if isinstance(earliest, datetime.date): - earliest = Undate.from_datetime_date(earliest) - else: + # takes two undate objects; allows conversion from supported types + if earliest: + try: + earliest = Undate.to_undate(earliest) + except TypeError: raise ValueError( f"earliest date {earliest} cannot be converted to Undate" ) - if latest and not isinstance(latest, Undate): - if isinstance(latest, datetime.date): - latest = Undate.from_datetime_date(latest) - else: + if latest: + try: + latest = Undate.to_undate(latest) + except TypeError: raise ValueError(f"latest date {latest} cannot be converted to Undate") # check that the interval is valid diff --git a/src/undate/undate.py b/src/undate/undate.py index f2d5300..1b9671e 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -2,11 +2,13 @@ import datetime from enum import auto + import re from typing import TYPE_CHECKING if TYPE_CHECKING: from undate.interval import UndateInterval + try: # StrEnum was only added in python 3.11 from enum import StrEnum @@ -246,23 +248,19 @@ def format(self, format) -> str: raise ValueError(f"Unsupported format '{format}'") - def _comparison_type(self, other: object) -> "Undate": + @classmethod + def _comparison_type(cls, other: object) -> "Undate": """Common logic for type handling in comparison methods. Converts to Undate object if possible, otherwise raises - NotImplemented error. Currently only supports conversion - from :class:`datetime.date` + NotImplementedError exception. Uses :meth:`to_undate` for conversion. """ - - # support datetime.date by converting to undate - if isinstance(other, datetime.date): - other = Undate.from_datetime_date(other) - - # recommended to support comparison with arbitrary objects - if not isinstance(other, Undate): + # convert if possible; return NotImplemented if not + try: + return cls.to_undate(other) + except TypeError: + # recommended to support comparison with arbitrary objects return NotImplemented - return other - def __eq__(self, other: object) -> bool: # Note: assumes label differences don't matter for comparing dates @@ -272,6 +270,8 @@ def __eq__(self, other: object) -> bool: other = self._comparison_type(other) if other is NotImplemented: + # return NotImplemented to indicate comparison is not supported + # with this type return NotImplemented # if both dates are fully known, then earliest/latest check @@ -363,10 +363,23 @@ def __contains__(self, other: object) -> bool: ] ) - @staticmethod - def from_datetime_date(dt_date: datetime.date): - """Initialize an :class:`Undate` object from a :class:`datetime.date`""" - return Undate(dt_date.year, dt_date.month, dt_date.day) + @classmethod + def to_undate(cls, other: object) -> "Undate": + """Converted arbitrary object to Undate, if possible. Raises TypeError + if conversion is not possible. + + Currently suppports: + - :class:`datetime.date` or :class:`datetime.datetime` + + """ + match other: + case Undate(): + return other + case datetime.date() | datetime.datetime(): + return Undate(other.year, other.month, other.day) + + case _: + raise TypeError(f"Conversion from {type(other)} is not supported") @property def known_year(self) -> bool: diff --git a/tests/test_undate.py b/tests/test_undate.py index 46fe973..b3ba4fe 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,4 +1,4 @@ -from datetime import date +from datetime import date, datetime import pytest @@ -142,11 +142,16 @@ def test_invalid_date(self): with pytest.raises(ValueError): Undate(1990, 22) - def test_from_datetime_date(self): - undate_from_date = Undate.from_datetime_date(date(2001, 3, 5)) + def test_to_undate(self): + undate_from_date = Undate.to_undate(date(2001, 3, 5)) assert isinstance(undate_from_date, Undate) assert undate_from_date == Undate(2001, 3, 5) + now = datetime.now() + undate_from_dt = Undate.to_undate(now) + assert isinstance(undate_from_dt, Undate) + assert undate_from_dt == Undate(now.year, now.month, now.day) + # test properties for accessing parts of date def test_year_property(self): # two, three, four five digit years; numeric and string From fc4f7a92e693e5e068503bbd7ced47f989560a1e Mon Sep 17 00:00:00 2001 From: rlskoeser <rebecca.s.koeser@princeton.edu> Date: Fri, 21 Feb 2025 11:51:16 -0500 Subject: [PATCH 04/10] Drop support for python 3.9 so we can use match/case --- .github/workflows/unit_tests.yml | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 17a1c7a..381b231 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python: ["3.10", "3.11", "3.12", "3.13"] defaults: run: working-directory: . diff --git a/pyproject.toml b/pyproject.toml index f1ad9a7..8bcf839 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "undate" description = "library for working with uncertain, fuzzy, or partially unknown dates and date intervals" readme = "README.md" license = { text = "Apache-2" } -requires-python = ">= 3.9" +requires-python = ">= 3.10" dynamic = ["version"] dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"] authors = [ @@ -31,7 +31,6 @@ keywords = [ classifiers = [ "Development Status :: 2 - Pre-Alpha", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", From a4f2e7bd322d74b2db6ef4684b8ef25f9a0f7a86 Mon Sep 17 00:00:00 2001 From: rlskoeser <rebecca.s.koeser@princeton.edu> Date: Fri, 21 Feb 2025 12:01:53 -0500 Subject: [PATCH 05/10] Add more type checks and tests --- src/undate/interval.py | 3 +++ tests/test_interval.py | 6 ++++++ tests/test_undate.py | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/src/undate/interval.py b/src/undate/interval.py index 33c3046..4472d67 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -73,6 +73,9 @@ def __repr__(self) -> str: return "<UndateInterval %s>" % self def __eq__(self, other) -> bool: + # currently doesn't support comparison with any other types + if not isinstance(other, UndateInterval): + return NotImplemented # consider interval equal if both dates are equal return self.earliest == other.earliest and self.latest == other.latest diff --git a/tests/test_interval.py b/tests/test_interval.py index 3d49179..254f3c7 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -82,6 +82,12 @@ def test_eq(self): ) assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5)) + def test_eq_type_check(self): + # doesn't currently support comparison with anything else + interval = UndateInterval(Undate(900)) + # returns NotIplemented if comparison with this type is not supported + assert interval.__eq__("foo") == NotImplemented + def test_not_eq(self): assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval( Undate(2022), Undate(2024) diff --git a/tests/test_undate.py b/tests/test_undate.py index b3ba4fe..a9087c2 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -152,6 +152,10 @@ def test_to_undate(self): assert isinstance(undate_from_dt, Undate) assert undate_from_dt == Undate(now.year, now.month, now.day) + # unsupported type + with pytest.raises(TypeError): + Undate.to_undate("foo") + # test properties for accessing parts of date def test_year_property(self): # two, three, four five digit years; numeric and string From b09c9fc5a354ccb9f4e172b76818a0630607ebb8 Mon Sep 17 00:00:00 2001 From: rlskoeser <rebecca.s.koeser@princeton.edu> Date: Wed, 5 Mar 2025 19:12:42 -0500 Subject: [PATCH 06/10] Remove unused import --- tests/test_converters/test_edtf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index 5c98446..3262e46 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -1,6 +1,5 @@ import pytest from undate.converters.edtf import EDTFDateConverter -from undate.date import DatePrecision from undate import Undate, UndateInterval From f06960ae7273d5f4257680a2675f91db7ae92f73 Mon Sep 17 00:00:00 2001 From: rlskoeser <rebecca.s.koeser@princeton.edu> Date: Thu, 13 Mar 2025 16:11:06 -0400 Subject: [PATCH 07/10] Use raise from err on type error in interval init Based on @coderabbitai feedback --- src/undate/interval.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index 4472d67..262bd5b 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -36,15 +36,17 @@ def __init__( if earliest: try: earliest = Undate.to_undate(earliest) - except TypeError: + except TypeError as err: raise ValueError( f"earliest date {earliest} cannot be converted to Undate" - ) + ) from err if latest: try: latest = Undate.to_undate(latest) - except TypeError: - raise ValueError(f"latest date {latest} cannot be converted to Undate") + except TypeError as err: + raise ValueError( + f"latest date {latest} cannot be converted to Undate" + ) from err # check that the interval is valid if latest and earliest and latest <= earliest: @@ -123,7 +125,7 @@ def duration(self) -> Timedelta: def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: """Determine the intersection or overlap between two :class:`UndateInterval` - objects and return a new interval, or None if no overlap. + objects and return a new interval. Returns None if there is no overlap. """ try: # when both values are defined, return the inner bounds; From 9ee14ef2ed46fc213d5cdf14987da450a8df2381 Mon Sep 17 00:00:00 2001 From: rlskoeser <rebecca.s.koeser@princeton.edu> Date: Thu, 13 Mar 2025 16:48:03 -0400 Subject: [PATCH 08/10] Add and test contains/in method for interval --- src/undate/interval.py | 39 ++++++++++++++++++++++++++++++++-- tests/test_interval.py | 48 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index 262bd5b..8e6cd2f 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -23,8 +23,8 @@ class UndateInterval: latest: Union[Undate, None] label: Union[str, None] - # TODO: let's think about adding an optional precision / length /size field - # using DatePrecision + # TODO: think about adding an optional precision / length /size field + # using DatePrecision for intervals of any standard duration (decade, century) def __init__( self, @@ -123,6 +123,41 @@ def duration(self) -> Timedelta: # if one year is known and the other is not? raise NotImplementedError + def __contains__(self, other: object) -> bool: + """Determine if another interval or date falls within this + interval.""" + # support comparison with another interval + if isinstance(other, UndateInterval): + # if two intervals are strictly equal, don't consider + # either one as containing the other + if self == other: + return False + # otherwise compare based on earliest/latest bounds + other_earliest = other.earliest + other_latest = other.latest + else: + # otherwise, try to convert to an Undate + try: + other = Undate.to_undate(other) + other_latest = other_earliest = other + except TypeError: + # if conversion fails, then we don't support comparison + raise + + # if either bound of the current interval is None, + # then it is an open interval and we don't need to check the other value. + # if the other value is set, then check that it falls within the + # bounds of this interval + return ( + self.earliest is None + or other_earliest is not None + and other_earliest >= self.earliest + ) and ( + self.latest is None + or other_latest is not None + and other_latest <= self.latest + ) + def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: """Determine the intersection or overlap between two :class:`UndateInterval` objects and return a new interval. Returns None if there is no overlap. diff --git a/tests/test_interval.py b/tests/test_interval.py index 254f3c7..4552c05 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -181,3 +181,51 @@ def test_intersection(self): assert before_20th.intersection(after_c11th) == UndateInterval( Undate(1001), Undate(1901) ) + + def test_contains(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # an interval doesn't contain itself + for interval in [century11th, century20th, decade1990s]: + assert interval not in interval + + # checking if an interval is within another interval + assert decade1990s in century20th + assert decade1990s not in century11th + assert century11th not in decade1990s + assert century20th not in decade1990s + # a specific date can be contained by an interval + y2k = Undate(2000) + assert y2k in century20th + assert y2k not in century11th + # partially known date should work too + april_someyear = Undate("198X", 4) + assert april_someyear in century20th + assert april_someyear not in century11th + # conversion from datetime.date also works + assert datetime.date(1922, 5, 1) in century20th + # unsupported types result in a type error + with pytest.raises(TypeError): + "nineteen-eighty-four" in century20th + + # contains check with half-open intervals + after_c11th = UndateInterval(Undate(1001), None) + before_20th = UndateInterval(None, Undate(1901)) + # neither of them contains the other + assert after_c11th not in before_20th + assert before_20th not in after_c11th + # nor are they contained by a smaller range + assert after_c11th not in decade1990s + assert before_20th not in decade1990s + + # all of our previous test dates are in the 1900s, + # so they are after the 11th century and not before the 20th + for period in [decade1990s, y2k, april_someyear]: + assert period in after_c11th + assert period not in before_20th + + # fully open interval - is this even meaningful? + whenever = UndateInterval(None, None) + assert decade1990s in whenever + assert whenever not in whenever From 700c8348ef9f6e377ce1dab510fdfb2a82105421 Mon Sep 17 00:00:00 2001 From: rlskoeser <rebecca.s.koeser@princeton.edu> Date: Thu, 13 Mar 2025 18:06:20 -0400 Subject: [PATCH 09/10] Address nitpicks flagged by @coderabbitai --- tests/test_interval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_interval.py b/tests/test_interval.py index 4552c05..40713b1 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -85,7 +85,7 @@ def test_eq(self): def test_eq_type_check(self): # doesn't currently support comparison with anything else interval = UndateInterval(Undate(900)) - # returns NotIplemented if comparison with this type is not supported + # returns NotImplemented if comparison with this type is not supported assert interval.__eq__("foo") == NotImplemented def test_not_eq(self): @@ -207,7 +207,7 @@ def test_contains(self): assert datetime.date(1922, 5, 1) in century20th # unsupported types result in a type error with pytest.raises(TypeError): - "nineteen-eighty-four" in century20th + assert "nineteen-eighty-four" in century20th # contains check with half-open intervals after_c11th = UndateInterval(Undate(1001), None) From cfdef424b008bbdd73c6eaf70f63f4f53bf726a0 Mon Sep 17 00:00:00 2001 From: rlskoeser <rebecca.s.koeser@princeton.edu> Date: Sat, 5 Apr 2025 14:32:10 -0400 Subject: [PATCH 10/10] Revise contains logic: interval contains itself or equivalent interval --- src/undate/interval.py | 12 +++++------- tests/test_interval.py | 8 +++++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index 8e6cd2f..96950cf 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -125,14 +125,12 @@ def duration(self) -> Timedelta: def __contains__(self, other: object) -> bool: """Determine if another interval or date falls within this - interval.""" - # support comparison with another interval + interval. Supports comparison with :class:`UndateInterval` + or anything that can be converted with :meth:`Undate.to_undate`.""" + # support comparison with another interval or anything + # that can be converted to an Undate if isinstance(other, UndateInterval): - # if two intervals are strictly equal, don't consider - # either one as containing the other - if self == other: - return False - # otherwise compare based on earliest/latest bounds + # compare based on earliest/latest bounds other_earliest = other.earliest other_latest = other.latest else: diff --git a/tests/test_interval.py b/tests/test_interval.py index 40713b1..3101b2d 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -186,9 +186,9 @@ def test_contains(self): century11th = UndateInterval(Undate(1001), Undate(1100)) century20th = UndateInterval(Undate(1901), Undate(2000)) decade1990s = UndateInterval(Undate(1990), Undate(1999)) - # an interval doesn't contain itself + # an interval DOES contain itself for interval in [century11th, century20th, decade1990s]: - assert interval not in interval + assert interval in interval # checking if an interval is within another interval assert decade1990s in century20th @@ -228,4 +228,6 @@ def test_contains(self): # fully open interval - is this even meaningful? whenever = UndateInterval(None, None) assert decade1990s in whenever - assert whenever not in whenever + # NOTE: an interval contains itself or an equivalent interval, + # but that may not make sense for open intervals... + assert whenever in whenever