diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 17a1c7a..381b231 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python: ["3.10", "3.11", "3.12", "3.13"] defaults: run: working-directory: . diff --git a/pyproject.toml b/pyproject.toml index f1ad9a7..8bcf839 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "undate" description = "library for working with uncertain, fuzzy, or partially unknown dates and date intervals" readme = "README.md" license = { text = "Apache-2" } -requires-python = ">= 3.9" +requires-python = ">= 3.10" dynamic = ["version"] dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"] authors = [ @@ -31,7 +31,6 @@ keywords = [ classifiers = [ "Development Status :: 2 - Pre-Alpha", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", diff --git a/src/undate/interval.py b/src/undate/interval.py index 33ec200..96950cf 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -1,5 +1,3 @@ -import datetime - # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Optional, Union @@ -25,8 +23,8 @@ class UndateInterval: latest: Union[Undate, None] label: Union[str, None] - # TODO: let's think about adding an optional precision / length /size field - # using DatePrecision + # TODO: think about adding an optional precision / length /size field + # using DatePrecision for intervals of any standard duration (decade, century) def __init__( self, @@ -34,22 +32,21 @@ def __init__( latest: Optional[Undate] = None, label: Optional[str] = None, ): - # for now, assume takes two undate objects; - # support conversion from datetime - if earliest and not isinstance(earliest, Undate): - # NOTE: some overlap with Undate._comparison_type method - # maybe support conversion from other formats later - if isinstance(earliest, datetime.date): - earliest = Undate.from_datetime_date(earliest) - else: + # takes two undate objects; allows conversion from supported types + if earliest: + try: + earliest = Undate.to_undate(earliest) + except TypeError as err: raise ValueError( f"earliest date {earliest} cannot be converted to Undate" - ) - if latest and not isinstance(latest, Undate): - if isinstance(latest, datetime.date): - latest = Undate.from_datetime_date(latest) - else: - raise ValueError(f"latest date {latest} cannot be converted to Undate") + ) from err + if latest: + try: + latest = Undate.to_undate(latest) + except TypeError as err: + raise ValueError( + f"latest date {latest} cannot be converted to Undate" + ) from err # check that the interval is valid if latest and earliest and latest <= earliest: @@ -78,6 +75,9 @@ def __repr__(self) -> str: return "" % self def __eq__(self, other) -> bool: + # currently doesn't support comparison with any other types + if not isinstance(other, UndateInterval): + return NotImplemented # consider interval equal if both dates are equal return self.earliest == other.earliest and self.latest == other.latest @@ -122,3 +122,60 @@ def duration(self) -> Timedelta: # is there any meaningful way to calculate duration # if one year is known and the other is not? raise NotImplementedError + + def __contains__(self, other: object) -> bool: + """Determine if another interval or date falls within this + interval. Supports comparison with :class:`UndateInterval` + or anything that can be converted with :meth:`Undate.to_undate`.""" + # support comparison with another interval or anything + # that can be converted to an Undate + if isinstance(other, UndateInterval): + # compare based on earliest/latest bounds + other_earliest = other.earliest + other_latest = other.latest + else: + # otherwise, try to convert to an Undate + try: + other = Undate.to_undate(other) + other_latest = other_earliest = other + except TypeError: + # if conversion fails, then we don't support comparison + raise + + # if either bound of the current interval is None, + # then it is an open interval and we don't need to check the other value. + # if the other value is set, then check that it falls within the + # bounds of this interval + return ( + self.earliest is None + or other_earliest is not None + and other_earliest >= self.earliest + ) and ( + self.latest is None + or other_latest is not None + and other_latest <= self.latest + ) + + def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: + """Determine the intersection or overlap between two :class:`UndateInterval` + objects and return a new interval. Returns None if there is no overlap. + """ + try: + # when both values are defined, return the inner bounds; + # if not, return whichever is not None, or None + earliest = ( + max(self.earliest, other.earliest) + if self.earliest and other.earliest + else self.earliest or other.earliest + ) + latest = ( + min(self.latest, other.latest) + if self.latest and other.latest + else self.latest or other.latest + ) + + # if this results in an invalid interval, initialization + # will throw an exception + return UndateInterval(earliest, latest) + except ValueError: + return None diff --git a/src/undate/undate.py b/src/undate/undate.py index 2008914..1b9671e 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -2,11 +2,13 @@ import datetime from enum import auto + import re from typing import TYPE_CHECKING if TYPE_CHECKING: from undate.interval import UndateInterval + try: # StrEnum was only added in python 3.11 from enum import StrEnum @@ -72,6 +74,10 @@ def __init__( label: Optional[str] = None, calendar: Optional[Union[str, Calendar]] = None, ): + # everything is optional but something is required + if all([val is None for val in [year, month, day]]): + raise ValueError("At least one of year, month, or day must be specified") + # keep track of initial values and which values are known # TODO: add validation: if str, must be expected length self.initial_values: Dict[str, Optional[Union[int, str]]] = { @@ -242,23 +248,19 @@ def format(self, format) -> str: raise ValueError(f"Unsupported format '{format}'") - def _comparison_type(self, other: object) -> "Undate": + @classmethod + def _comparison_type(cls, other: object) -> "Undate": """Common logic for type handling in comparison methods. Converts to Undate object if possible, otherwise raises - NotImplemented error. Currently only supports conversion - from :class:`datetime.date` + NotImplementedError exception. Uses :meth:`to_undate` for conversion. """ - - # support datetime.date by converting to undate - if isinstance(other, datetime.date): - other = Undate.from_datetime_date(other) - - # recommended to support comparison with arbitrary objects - if not isinstance(other, Undate): + # convert if possible; return NotImplemented if not + try: + return cls.to_undate(other) + except TypeError: + # recommended to support comparison with arbitrary objects return NotImplemented - return other - def __eq__(self, other: object) -> bool: # Note: assumes label differences don't matter for comparing dates @@ -268,6 +270,8 @@ def __eq__(self, other: object) -> bool: other = self._comparison_type(other) if other is NotImplemented: + # return NotImplemented to indicate comparison is not supported + # with this type return NotImplemented # if both dates are fully known, then earliest/latest check @@ -359,10 +363,23 @@ def __contains__(self, other: object) -> bool: ] ) - @staticmethod - def from_datetime_date(dt_date: datetime.date): - """Initialize an :class:`Undate` object from a :class:`datetime.date`""" - return Undate(dt_date.year, dt_date.month, dt_date.day) + @classmethod + def to_undate(cls, other: object) -> "Undate": + """Converted arbitrary object to Undate, if possible. Raises TypeError + if conversion is not possible. + + Currently suppports: + - :class:`datetime.date` or :class:`datetime.datetime` + + """ + match other: + case Undate(): + return other + case datetime.date() | datetime.datetime(): + return Undate(other.year, other.month, other.day) + + case _: + raise TypeError(f"Conversion from {type(other)} is not supported") @property def known_year(self) -> bool: diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index 5210e98..3262e46 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -1,6 +1,5 @@ import pytest from undate.converters.edtf import EDTFDateConverter -from undate.date import DatePrecision from undate import Undate, UndateInterval @@ -64,8 +63,8 @@ def test_to_string(self): # if converter can't generate a string for the date, # it should return a value error - empty_undate = Undate() - empty_undate.precision = DatePrecision.DECADE - with pytest.raises(ValueError): - EDTFDateConverter().to_string(empty_undate) + # empty_undate = Undate() # undate with no date information no longer supported + # empty_undate.precision = DatePrecision.DECADE + # with pytest.raises(ValueError): + # EDTFDateConverter().to_string(empty_undate) # TODO: override missing digit and confirm replacement diff --git a/tests/test_interval.py b/tests/test_interval.py index dea8710..3101b2d 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -82,6 +82,12 @@ def test_eq(self): ) assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5)) + def test_eq_type_check(self): + # doesn't currently support comparison with anything else + interval = UndateInterval(Undate(900)) + # returns NotImplemented if comparison with this type is not supported + assert interval.__eq__("foo") == NotImplemented + def test_not_eq(self): assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval( Undate(2022), Undate(2024) @@ -143,3 +149,85 @@ def test_duration(self): # one year set and the other not currently raises not implemented error with pytest.raises(NotImplementedError): UndateInterval(Undate(2000), Undate(month=10)).duration() + + def test_intersection(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + # no intersection + assert century11th.intersection(century20th) is None + # should work in either direction + assert century20th.intersection(century11th) is None + + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # intersection of an interval completely contained in another + # returns an interval equivalent to the smaller one + assert century20th.intersection(decade1990s) == decade1990s + assert decade1990s.intersection(century20th) == decade1990s + + # partial overlap + nineties_oughts = UndateInterval(Undate(1990), Undate(2009)) + assert century20th.intersection(nineties_oughts) == UndateInterval( + Undate(1990), Undate(2000) + ) + + # intersections between half open intervals + after_c11th = UndateInterval(Undate(1001), None) + assert after_c11th.intersection(century20th) == century20th + assert after_c11th.intersection(decade1990s) == decade1990s + + before_20th = UndateInterval(None, Undate(1901)) + assert before_20th.intersection(decade1990s) is None + assert before_20th.intersection(century11th) == century11th + assert before_20th.intersection(after_c11th) == UndateInterval( + Undate(1001), Undate(1901) + ) + + def test_contains(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # an interval DOES contain itself + for interval in [century11th, century20th, decade1990s]: + assert interval in interval + + # checking if an interval is within another interval + assert decade1990s in century20th + assert decade1990s not in century11th + assert century11th not in decade1990s + assert century20th not in decade1990s + # a specific date can be contained by an interval + y2k = Undate(2000) + assert y2k in century20th + assert y2k not in century11th + # partially known date should work too + april_someyear = Undate("198X", 4) + assert april_someyear in century20th + assert april_someyear not in century11th + # conversion from datetime.date also works + assert datetime.date(1922, 5, 1) in century20th + # unsupported types result in a type error + with pytest.raises(TypeError): + assert "nineteen-eighty-four" in century20th + + # contains check with half-open intervals + after_c11th = UndateInterval(Undate(1001), None) + before_20th = UndateInterval(None, Undate(1901)) + # neither of them contains the other + assert after_c11th not in before_20th + assert before_20th not in after_c11th + # nor are they contained by a smaller range + assert after_c11th not in decade1990s + assert before_20th not in decade1990s + + # all of our previous test dates are in the 1900s, + # so they are after the 11th century and not before the 20th + for period in [decade1990s, y2k, april_someyear]: + assert period in after_c11th + assert period not in before_20th + + # fully open interval - is this even meaningful? + whenever = UndateInterval(None, None) + assert decade1990s in whenever + # NOTE: an interval contains itself or an equivalent interval, + # but that may not make sense for open intervals... + assert whenever in whenever diff --git a/tests/test_undate.py b/tests/test_undate.py index 8f8a5c8..a9087c2 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,4 +1,4 @@ -from datetime import date +from datetime import date, datetime import pytest @@ -132,18 +132,30 @@ def test_calendar(self): def test_init_invalid(self): with pytest.raises(ValueError): - Undate("19xx") + Undate("19??") + + with pytest.raises(ValueError, match="At least one of year, month, or day"): + Undate() def test_invalid_date(self): # invalid month should raise an error with pytest.raises(ValueError): Undate(1990, 22) - def test_from_datetime_date(self): - undate_from_date = Undate.from_datetime_date(date(2001, 3, 5)) + def test_to_undate(self): + undate_from_date = Undate.to_undate(date(2001, 3, 5)) assert isinstance(undate_from_date, Undate) assert undate_from_date == Undate(2001, 3, 5) + now = datetime.now() + undate_from_dt = Undate.to_undate(now) + assert isinstance(undate_from_dt, Undate) + assert undate_from_dt == Undate(now.year, now.month, now.day) + + # unsupported type + with pytest.raises(TypeError): + Undate.to_undate("foo") + # test properties for accessing parts of date def test_year_property(self): # two, three, four five digit years; numeric and string @@ -156,10 +168,11 @@ def test_year_property(self): # unset year assert Undate(month=12, day=31).year == "XXXX" + # NOTE: no longer supported to inistalize undate with no date information # force method to hit conditional for date precision - some_century = Undate() - some_century.precision = DatePrecision.CENTURY - assert some_century.year is None + # some_century = Undate() + # some_century.precision = DatePrecision.CENTURY + # assert some_century.year is None def test_month_property(self): # one, two digit month