From 5b5746d70337b5a62aa26c26f3fe934ee50c3b43 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 14:21:50 -0400 Subject: [PATCH 01/12] Add duration logic for uncertain years --- src/undate/converters/base.py | 4 + .../converters/calendars/hebrew/converter.py | 4 + src/undate/undate.py | 91 ++++++++++++------- tests/test_undate.py | 14 +++ 4 files changed, 80 insertions(+), 33 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 04db129..9cb0b59 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -162,6 +162,10 @@ def max_day(self, year: int, month: int) -> int: """maximum numeric day for the specified year and month in this calendar""" raise NotImplementedError + def days_in_year(self, year: int) -> int: + """number of days in the specified year in this calendar""" + raise NotImplementedError + def to_gregorian(self, year, month, day) -> tuple[int, int, int]: """Convert a date for this calendar specified by numeric year, month, and day, into the Gregorian equivalent date. Should return a tuple of year, month, day. diff --git a/src/undate/converters/calendars/hebrew/converter.py b/src/undate/converters/calendars/hebrew/converter.py index d540021..de7c1ac 100644 --- a/src/undate/converters/calendars/hebrew/converter.py +++ b/src/undate/converters/calendars/hebrew/converter.py @@ -47,6 +47,10 @@ def max_day(self, year: int, month: int) -> int: # NOTE: unreleased v2.4.1 of convertdate standardizes month_days to month_length return hebrew.month_days(year, month) + def days_in_year(self, year: int) -> int: + """the number of days in the specified year for this calendar""" + return int(hebrew.year_days(year)) + def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: """Convert a Hebrew date, specified by year, month, and day, to the Gregorian equivalent date. Returns a tuple of year, month, day. diff --git a/src/undate/undate.py b/src/undate/undate.py index e2068e1..9732584 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -126,6 +126,7 @@ def calculate_earliest_latest(self, year, month, day): else: # use the configured min/max allowable years if we # don't have any other bounds + # TODO: make calendar-specific? these are min/max for gregorian min_year = self.MIN_ALLOWABLE_YEAR max_year = self.MAX_ALLOWABLE_YEAR @@ -166,7 +167,7 @@ def calculate_earliest_latest(self, year, month, day): else: # if we have no day or partial day, calculate min / max min_day = 1 # is min day ever anything other than 1 ? - rel_year = year if year and isinstance(year, int) else None + rel_year = year if year and isinstance(year, int) else max_year # use month if it is an integer; otherwise use previusly determined # max month (which may not be 12 depending if partially unknown) rel_month = month if month and isinstance(month, int) else latest_month @@ -473,27 +474,29 @@ def duration(self) -> Timedelta | UnDelta: if self.precision == DatePrecision.DAY: return ONE_DAY + # if year is unknown or partially unknown, month and year duration both need to calculate for + # variant years (leap year, non-leap year), since length may vary + possible_years = [self.earliest.year] + if self.is_partially_known("year"): + # if year is partially known (e.g. 191X), get all possible years in range + # TODO: refactor into a function/property; combine/extract from missing digit min/max method + possible_years = [ + int(str(self.year).replace(self.MISSING_DIGIT, str(digit))) + for digit in range(0, 10) + ] + # TODO: once this is working, make more efficient by only getting representative years from the calendar + elif not self.known_year: # completely unknown year + # TODO: should leap-year specific logic shift to the calendars, + # since it works differently depending on the calendar? + possible_years = [ + self.calendar_converter.LEAP_YEAR, + self.calendar_converter.NON_LEAP_YEAR, + ] + possible_max_days = None + # if precision is month and year is unknown, # calculate month duration within a single year (not min/max) if self.precision == DatePrecision.MONTH: - latest = self.latest - # if year is unknown, calculate month duration in - # leap year and non-leap year, in case length varies - if not self.known_year: - # TODO: should leap-year specific logic shift to the calendars, - # since it works differently depending on the calendar? - possible_years = [ - self.calendar_converter.LEAP_YEAR, - self.calendar_converter.NON_LEAP_YEAR, - ] - # TODO: handle partially known years like 191X, - # switch to representative years (depends on calendar) - # (to be implemented as part of ambiguous year duration) - else: - # otherwise, get possible durations for all possible months - # for a known year - possible_years = [self.earliest.year] - # for every possible month and year, get max days for that month, possible_max_days = set() # appease mypy, which says month values could be None here; @@ -506,23 +509,45 @@ def duration(self) -> Timedelta | UnDelta: self.calendar_converter.max_day(year, possible_month) ) - # if there is more than one possible value for month length, - # whether due to leap year / non-leap year or ambiguous month, - # return an uncertain delta - if len(possible_max_days) > 1: - return UnDelta(*possible_max_days) - - # otherwise, calculate timedelta normally based on maximum day - max_day = list(possible_max_days)[0] - latest = Date(self.earliest.year, self.earliest.month, max_day) + # if precision is year but year is unknown, return an uncertain delta + elif self.precision == DatePrecision.YEAR: + possible_max_days = set() + # this is currently hebrew-specific due to the way the start/end of year wraps for that calendar + try: + possible_max_days = set( + [self.calendar_converter.days_in_year(y) for y in possible_years] + ) + except NotImplementedError: + pass + + if not possible_max_days: + for year in possible_years: + # TODO: shift logic to calendars for parity with Hebrew? + year_start = Date( + *self.calendar_converter.to_gregorian( + year, self.calendar_converter.min_month(), 1 + ) + ) + last_month = self.calendar_converter.max_month(year) + year_end = Date( + *self.calendar_converter.to_gregorian( + year, + last_month, + self.calendar_converter.max_day(year, last_month), + ) + ) - return latest - self.earliest + ONE_DAY + year_days = (year_end - year_start).days + 1 + possible_max_days.add(year_days) - # TODO: handle year precision + unknown/partially known year - # (will be handled in separate branch) + # if there is more than one possible value for number of days + # due to range including lear year / non-leap year, return an uncertain delta + if possible_max_days and len(possible_max_days) > 1: + return UnDelta(*possible_max_days) + else: + return Timedelta(possible_max_days.pop()) - # otherwise, calculate based on earliest/latest range - # subtract earliest from latest and add a day to count start day + # otherwise, subtract earliest from latest and add a day to include start day in the count return self.latest - self.earliest + ONE_DAY def _missing_digit_minmax( diff --git a/tests/test_undate.py b/tests/test_undate.py index cf2b252..865fcb2 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -438,6 +438,20 @@ def test_partiallyknown_duration(self): assert isinstance(feb_duration, UnDelta) assert feb_duration.days == UnInt(28, 29) + def test_partiallyknownyear_duration(self): + assert Undate("190X").duration().days == UnInt(365, 366) + assert Undate("XXXX").duration().days == UnInt(365, 366) + # if possible years don't include any leap years, duration is not ambiguous + assert Undate("19X1").duration().days == 365 + # year duration logic should work in other calendars + # islamic + assert Undate("108X", calendar="Islamic").duration().days == UnInt(354, 355) + # NOTE: completely unknown years is not yet supported for other calendars, will cause an error + # assert Undate("XXXX", calendar="Islamic").duration().days == UnInt(354, 355) + # + print(Undate("536X", calendar="Hebrew").duration()) + assert Undate("536X", calendar="Hebrew").duration().days == UnInt(353, 385) + def test_known_year(self): assert Undate(2022).known_year is True assert Undate(month=2, day=5).known_year is False From 5bb57c0702ff2e81df6d16c6d7599734c481e549 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 15:11:59 -0400 Subject: [PATCH 02/12] Clean up flagged by @coderabbitai with additional refactoring --- src/undate/converters/base.py | 19 +++++++- src/undate/converters/calendars/seleucid.py | 4 ++ src/undate/undate.py | 48 ++++++--------------- tests/test_undate.py | 4 +- 4 files changed, 37 insertions(+), 38 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 9cb0b59..27698c5 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -48,6 +48,8 @@ from functools import cache from typing import Dict, Type +from undate.date import Date + logger = logging.getLogger(__name__) @@ -58,6 +60,10 @@ class BaseDateConverter: #: Converter name. Subclasses must define a unique name. name: str = "Base Converter" + # provisional... + LEAP_YEAR = 0 + NON_LEAP_YEAR = 0 + def parse(self, value: str): """ Parse a string and return an :class:`~undate.undate.Undate` or @@ -163,8 +169,17 @@ def max_day(self, year: int, month: int) -> int: raise NotImplementedError def days_in_year(self, year: int) -> int: - """number of days in the specified year in this calendar""" - raise NotImplementedError + """Number of days in the specified year in this calendar. The default implementation + uses min and max month and max day methods along with Gregorian conversion method + to calculate the number of days in the specified year. + """ + year_start = Date(*self.to_gregorian(year, self.min_month(), 1)) + last_month = self.max_month(year) + year_end = Date( + *self.to_gregorian(year, last_month, self.max_day(year, last_month)) + ) + # add 1 because the difference doesn't include the end point + return (year_end - year_start).days + 1 def to_gregorian(self, year, month, day) -> tuple[int, int, int]: """Convert a date for this calendar specified by numeric year, month, and day, diff --git a/src/undate/converters/calendars/seleucid.py b/src/undate/converters/calendars/seleucid.py index bddf867..ae54965 100644 --- a/src/undate/converters/calendars/seleucid.py +++ b/src/undate/converters/calendars/seleucid.py @@ -22,3 +22,7 @@ def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: logic with :attr:`SELEUCID_OFFSET`. Returns a tuple of year, month, day. """ return super().to_gregorian(year + self.SELEUCID_OFFSET, month, day) + + def days_in_year(self, year: int) -> int: + """the number of days in the specified year for this calendar""" + return super().days_in_year(year + self.SELEUCID_OFFSET) diff --git a/src/undate/undate.py b/src/undate/undate.py index 9732584..63f85a4 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -19,7 +19,7 @@ # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Dict, Optional, Union -from undate.converters.base import BaseDateConverter +from undate.converters.base import BaseCalendarConverter, BaseDateConverter from undate.date import ONE_DAY, Date, DatePrecision, Timedelta, UnDelta @@ -32,10 +32,14 @@ class Calendar(StrEnum): SELEUCID = auto() @staticmethod - def get_converter(calendar): + def get_converter(calendar) -> BaseCalendarConverter: # calendar converter must be available with a name matching # the title-case name of the calendar enum entry converter_cls = BaseDateConverter.available_converters()[calendar.value.title()] + if not issubclass(converter_cls, BaseCalendarConverter): + raise ValueError( + f"Requested converter {converter_cls} is not a CalendarConverter" + ) return converter_cls() @@ -492,13 +496,12 @@ def duration(self) -> Timedelta | UnDelta: self.calendar_converter.LEAP_YEAR, self.calendar_converter.NON_LEAP_YEAR, ] - possible_max_days = None + possible_max_days = set() # if precision is month and year is unknown, # calculate month duration within a single year (not min/max) if self.precision == DatePrecision.MONTH: # for every possible month and year, get max days for that month, - possible_max_days = set() # appease mypy, which says month values could be None here; # Date object allows optional month, but earliest/latest initialization # should always be day-precision dates @@ -511,40 +514,17 @@ def duration(self) -> Timedelta | UnDelta: # if precision is year but year is unknown, return an uncertain delta elif self.precision == DatePrecision.YEAR: - possible_max_days = set() # this is currently hebrew-specific due to the way the start/end of year wraps for that calendar - try: - possible_max_days = set( - [self.calendar_converter.days_in_year(y) for y in possible_years] - ) - except NotImplementedError: - pass - - if not possible_max_days: - for year in possible_years: - # TODO: shift logic to calendars for parity with Hebrew? - year_start = Date( - *self.calendar_converter.to_gregorian( - year, self.calendar_converter.min_month(), 1 - ) - ) - last_month = self.calendar_converter.max_month(year) - year_end = Date( - *self.calendar_converter.to_gregorian( - year, - last_month, - self.calendar_converter.max_day(year, last_month), - ) - ) - - year_days = (year_end - year_start).days + 1 - possible_max_days.add(year_days) + # with contextlib.suppress(NotImplementedError): + possible_max_days = { + self.calendar_converter.days_in_year(y) for y in possible_years + } # if there is more than one possible value for number of days # due to range including lear year / non-leap year, return an uncertain delta - if possible_max_days and len(possible_max_days) > 1: - return UnDelta(*possible_max_days) - else: + if possible_max_days: + if len(possible_max_days) > 1: + return UnDelta(*possible_max_days) return Timedelta(possible_max_days.pop()) # otherwise, subtract earliest from latest and add a day to include start day in the count diff --git a/tests/test_undate.py b/tests/test_undate.py index 865fcb2..51409fd 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -448,9 +448,9 @@ def test_partiallyknownyear_duration(self): assert Undate("108X", calendar="Islamic").duration().days == UnInt(354, 355) # NOTE: completely unknown years is not yet supported for other calendars, will cause an error # assert Undate("XXXX", calendar="Islamic").duration().days == UnInt(354, 355) - # - print(Undate("536X", calendar="Hebrew").duration()) assert Undate("536X", calendar="Hebrew").duration().days == UnInt(353, 385) + # different set of years could vary + assert Undate("53X2", calendar="Hebrew").duration().days == UnInt(354, 385) def test_known_year(self): assert Undate(2022).known_year is True From e6b96df738b779e92d9c2ac4861333cfd67793ca Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 16:00:08 -0400 Subject: [PATCH 03/12] Refactor and test logic for possible years --- src/undate/undate.py | 66 ++++++++++++++++++++++++++++++-------------- tests/test_undate.py | 29 +++++++++++++++++++ 2 files changed, 75 insertions(+), 20 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 63f85a4..776033b 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -422,7 +422,9 @@ def is_known(self, part: str) -> bool: return isinstance(self.initial_values[part], int) def is_partially_known(self, part: str) -> bool: + # TODO: should XX / XXXX really be considered partially known? other code seems to assume this, so we'll preserve the behavior return isinstance(self.initial_values[part], str) + # and self.initial_values[part].replace(self.MISSING_DIGIT, "") != "" @property def year(self) -> Optional[str]: @@ -464,6 +466,47 @@ def _get_date_part(self, part: str) -> Optional[str]: value = self.initial_values.get(part) return str(value) if value else None + @property + def possible_years(self) -> list[int] | range: + """A list or range of possible years for this date in the original calendar. + Returns a list with a single year for dates with fully-known years.""" + if self.known_year: + return [self.earliest.year] + + step = 1 + if ( + self.is_partially_known("year") + and str(self.year).replace(self.MISSING_DIGIT, "") != "" + ): + # determine the smallest step size for the missing digit + earliest_year = int(str(self.year).replace(self.MISSING_DIGIT, "0")) + latest_year = int(str(self.year).replace(self.MISSING_DIGIT, "9")) + missing_digit_place = len(str(self.year)) - str(self.year).rfind( + self.MISSING_DIGIT + ) + # convert place to 1, 10, 100, 1000, etc. + step = 10 ** (missing_digit_place - 1) + return range(earliest_year, latest_year + 1, step) + else: # year is fully unknown + # returning range from min year to max year is not useful in any scenario! + raise ValueError( + "Possible years cannot be returned for completely unknown year" + ) + + return [] # shouldn't get here, but mypy complains + + @property + def representative_years(self) -> list[int]: + """A list of representative years for this date.""" + try: + # todo: filter by calendar to minimum needed + return list(self.possible_years) + except ValueError: + return [ + self.calendar_converter.LEAP_YEAR, + self.calendar_converter.NON_LEAP_YEAR, + ] + def duration(self) -> Timedelta | UnDelta: """What is the duration of this date? Calculate based on earliest and latest date within range, @@ -478,24 +521,6 @@ def duration(self) -> Timedelta | UnDelta: if self.precision == DatePrecision.DAY: return ONE_DAY - # if year is unknown or partially unknown, month and year duration both need to calculate for - # variant years (leap year, non-leap year), since length may vary - possible_years = [self.earliest.year] - if self.is_partially_known("year"): - # if year is partially known (e.g. 191X), get all possible years in range - # TODO: refactor into a function/property; combine/extract from missing digit min/max method - possible_years = [ - int(str(self.year).replace(self.MISSING_DIGIT, str(digit))) - for digit in range(0, 10) - ] - # TODO: once this is working, make more efficient by only getting representative years from the calendar - elif not self.known_year: # completely unknown year - # TODO: should leap-year specific logic shift to the calendars, - # since it works differently depending on the calendar? - possible_years = [ - self.calendar_converter.LEAP_YEAR, - self.calendar_converter.NON_LEAP_YEAR, - ] possible_max_days = set() # if precision is month and year is unknown, @@ -507,7 +532,7 @@ def duration(self) -> Timedelta | UnDelta: # should always be day-precision dates if self.earliest.month is not None and self.latest.month is not None: for possible_month in range(self.earliest.month, self.latest.month + 1): - for year in possible_years: + for year in self.representative_years: possible_max_days.add( self.calendar_converter.max_day(year, possible_month) ) @@ -517,7 +542,8 @@ def duration(self) -> Timedelta | UnDelta: # this is currently hebrew-specific due to the way the start/end of year wraps for that calendar # with contextlib.suppress(NotImplementedError): possible_max_days = { - self.calendar_converter.days_in_year(y) for y in possible_years + self.calendar_converter.days_in_year(y) + for y in self.representative_years } # if there is more than one possible value for number of days diff --git a/tests/test_undate.py b/tests/test_undate.py index 51409fd..974fbee 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -393,6 +393,35 @@ def test_sorting(self): # someyear = Undate("1XXX") # assert sorted([d1991, someyear]) == [someyear, d1991] + def test_possible_years(self): + assert Undate(1991).possible_years == [1991] + assert Undate("190X").possible_years == range(1900, 1910) + assert Undate("19XX").possible_years == range(1900, 2000) + # uses step when missing digit is not last digit + assert Undate("19X1").possible_years == range(1901, 1992, 10) + assert Undate("2X25").possible_years == range(2025, 2926, 100) + assert Undate("1XXX").possible_years == range(1000, 2000) + # completely unknown year raises value error, because the range is not useful + with pytest.raises( + ValueError, match="cannot be returned for completely unknown year" + ): + Undate("XXXX").possible_years + + def test_representative_years(self): + assert Undate("1991").representative_years == [1991] + assert Undate("190X").representative_years == [ + 1900, + 1901, + 1902, + 1903, + 1904, + 1905, + 1906, + 1907, + 1908, + 1909, + ] + def test_duration(self): day_duration = Undate(2022, 11, 7).duration() assert isinstance(day_duration, Timedelta) From e0bac94c43a85ca0d9ddf4e52bb0acef53151b33 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 16:21:20 -0400 Subject: [PATCH 04/12] Implement representative years filtering method for Gregorian calendar --- src/undate/converters/base.py | 8 ++++++ src/undate/converters/calendars/gregorian.py | 29 +++++++++++++++++++- src/undate/undate.py | 8 +++++- tests/test_undate.py | 16 +++-------- 4 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 27698c5..e305310 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -181,6 +181,14 @@ def days_in_year(self, year: int) -> int: # add 1 because the difference doesn't include the end point return (year_end - year_start).days + 1 + def representative_years(self, years: None | list[int] = None) -> list[int]: + """Returns a list of representative years within the specified list. + Result should include one for each type of variant year for this + calendar (e.g., leap year and non-leap year). If no years are specified, + returns a list of representative years for the current calendar. + """ + raise NotImplementedError + def to_gregorian(self, year, month, day) -> tuple[int, int, int]: """Convert a date for this calendar specified by numeric year, month, and day, into the Gregorian equivalent date. Should return a tuple of year, month, day. diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index c0e0a19..b3b103b 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -1,4 +1,4 @@ -from calendar import monthrange +from calendar import monthrange, isleap from undate.converters.base import BaseCalendarConverter @@ -45,6 +45,33 @@ def max_day(self, year: int, month: int) -> int: return max_day + def representative_years(self, years: None | list[int] = None) -> list[int]: + """Takes a list of years and returns a subset with one leap year and one non-leap year. + If no years are specified, returns a known leap year and non-leap year. + """ + + # if years is unset or list is empty + if not years: + return [self.LEAP_YEAR, self.NON_LEAP_YEAR] + + found_leap = False + found_non_leap = False + rep_years = [] + for year in years: + if isleap(year): + if not found_leap: + found_leap = True + rep_years.append(year) + else: + if not found_non_leap: + found_non_leap = True + rep_years.append(year) + # stop as soon as we've found one example of each type of year + if found_leap and found_non_leap: + break + + return rep_years + def to_gregorian(self, year, month, day) -> tuple[int, int, int]: """Convert to Gregorian date. This returns the specified by year, month, and day unchanged, but is provided for consistency since all calendar diff --git a/src/undate/undate.py b/src/undate/undate.py index 776033b..f7d671b 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -500,7 +500,13 @@ def representative_years(self) -> list[int]: """A list of representative years for this date.""" try: # todo: filter by calendar to minimum needed - return list(self.possible_years) + try: + return self.calendar_converter.representative_years( + list(self.possible_years) + ) + except NotImplementedError: + # if calendar converter does not support representative years, return all years + return list(self.possible_years) except ValueError: return [ self.calendar_converter.LEAP_YEAR, diff --git a/tests/test_undate.py b/tests/test_undate.py index 974fbee..20b6b05 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -408,19 +408,11 @@ def test_possible_years(self): Undate("XXXX").possible_years def test_representative_years(self): + # single year is returned as is assert Undate("1991").representative_years == [1991] - assert Undate("190X").representative_years == [ - 1900, - 1901, - 1902, - 1903, - 1904, - 1905, - 1906, - 1907, - 1908, - 1909, - ] + # for an uncertain year, returns first leap year and non-leap year in range + assert Undate("190X").representative_years == [1900, 1904] + assert Undate("19XX").representative_years == [1900, 1904] def test_duration(self): day_duration = Undate(2022, 11, 7).duration() From 43bc97bc8467f38dbb57f11b68edb0d4702bad56 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 16:39:05 -0400 Subject: [PATCH 05/12] Implement representative years for islamic calendar converter --- .../converters/calendars/islamic/converter.py | 31 +++++++++++++++++++ .../test_islamic/test_islamic_converter.py | 20 ++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/undate/converters/calendars/islamic/converter.py b/src/undate/converters/calendars/islamic/converter.py index c658c90..6995407 100644 --- a/src/undate/converters/calendars/islamic/converter.py +++ b/src/undate/converters/calendars/islamic/converter.py @@ -21,6 +21,11 @@ class IslamicDateConverter(BaseCalendarConverter): name: str = "Islamic" calendar_name: str = "Islamic" + #: arbitrary known non-leap year + NON_LEAP_YEAR: int = 1457 + #: arbitrary known leap year + LEAP_YEAR: int = 1458 + def __init__(self): self.transformer = IslamicDateTransformer() @@ -36,6 +41,32 @@ def max_month(self, year: int) -> int: """maximum numeric month for this calendar""" return 12 + def representative_years(self, years: None | list[int] = None) -> list[int]: + """Takes a list of years and returns a subset with one leap year and one non-leap year. + If no years are specified, returns a known leap year and non-leap year. + """ + + # if years is unset or list is empty + if not years: + return [self.LEAP_YEAR, self.NON_LEAP_YEAR] + found_leap = False + found_non_leap = False + rep_years = [] + for year in years: + if islamic.leap(year): + if not found_leap: + found_leap = True + rep_years.append(year) + else: + if not found_non_leap: + found_non_leap = True + rep_years.append(year) + # stop as soon as we've found one example of each type of year + if found_leap and found_non_leap: + break + + return rep_years + def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: """Convert a Hijri date, specified by year, month, and day, to the Gregorian equivalent date. Returns a tuple of year, month, day. diff --git a/tests/test_converters/test_calendars/test_islamic/test_islamic_converter.py b/tests/test_converters/test_calendars/test_islamic/test_islamic_converter.py index 4acacd0..cfcace2 100644 --- a/tests/test_converters/test_calendars/test_islamic/test_islamic_converter.py +++ b/tests/test_converters/test_calendars/test_islamic/test_islamic_converter.py @@ -152,3 +152,23 @@ def test_compare_across_calendars(self): ) expected_gregorian_years = [33, 1049, 1350, 1479, 1495, 1995] assert [d.earliest.year for d in sorted_dates] == expected_gregorian_years + + def test_representative_years(self): + converter = IslamicDateConverter() + # single year is not filtered + # 1458 is a leap year; 1457 and 1459 are not + assert converter.representative_years([1457]) == [1457] + # multiple non-leap years, returns just the first + assert converter.representative_years([1457, 1459]) == [1457] + # next leap year is 2028; returns first leap year and first non-leap year, in input order + assert converter.representative_years([1457, 1458, 1459]) == [1457, 1458] + + # if no years are provided, returns a known leap year and non-leap years + assert converter.representative_years() == [ + converter.LEAP_YEAR, + converter.NON_LEAP_YEAR, + ] + assert converter.representative_years([]) == [ + converter.LEAP_YEAR, + converter.NON_LEAP_YEAR, + ] From b436f33a7b4784e41ddd3389ed04b21b48fe108f Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 16:55:21 -0400 Subject: [PATCH 06/12] Implement representative years for hebrew calendar converter --- .../converters/calendars/hebrew/converter.py | 31 +++++++++++++++++++ .../test_hebrew/test_hebrew_converter.py | 27 ++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/undate/converters/calendars/hebrew/converter.py b/src/undate/converters/calendars/hebrew/converter.py index de7c1ac..165d67e 100644 --- a/src/undate/converters/calendars/hebrew/converter.py +++ b/src/undate/converters/calendars/hebrew/converter.py @@ -21,6 +21,11 @@ class HebrewDateConverter(BaseCalendarConverter): name: str = "Hebrew" calendar_name: str = "Anno Mundi" + #: arbitrary known non-leap year; 4816 is a non-leap year with 353 days (minimum possible) + NON_LEAP_YEAR: int = 4816 + #: arbitrary known leap year; 4837 is a leap year with 385 days (maximum possible) + LEAP_YEAR: int = 4837 + def __init__(self): self.transformer = HebrewDateTransformer() @@ -51,6 +56,32 @@ def days_in_year(self, year: int) -> int: """the number of days in the specified year for this calendar""" return int(hebrew.year_days(year)) + def representative_years(self, years: None | list[int] = None) -> list[int]: + """Takes a list of years and returns a subset with all possible variations in number of days. + If no years are specified, returns ... + """ + + year_lengths = set() + max_year_lengths = 6 # there are 6 different possible length years + + # if years is unset or list is empty + if not years: + # NOTE: this does not cover all possible lengths, but should cover min/max + return [self.LEAP_YEAR, self.NON_LEAP_YEAR] + + rep_years = [] + for year in years: + days = self.days_in_year(year) + if days not in year_lengths: + year_lengths.add(days) + rep_years.append(year) + + # stop if we find one example of each type of year + if len(year_lengths) == max_year_lengths: + break + + return rep_years + def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: """Convert a Hebrew date, specified by year, month, and day, to the Gregorian equivalent date. Returns a tuple of year, month, day. diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py index c3c8b7c..538082d 100644 --- a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py @@ -153,3 +153,30 @@ def test_compare_across_calendars(self): ) expected_gregorian_years = [-3261, 33, 1056, 1350, 1655, 1995] assert [d.earliest.year for d in sorted_dates] == expected_gregorian_years + + def test_representative_years(self): + converter = HebrewDateConverter() + # single year is not filtered + assert converter.representative_years([4816]) == [4816] + # 4816 has 353 days; 4817 has 355; 4818 has 384; 4819 has 355 + assert converter.representative_years([4816, 4817, 4818, 4819]) == [ + 4816, + 4817, + 4818, + ] + assert converter.representative_years([4816, 4817, 4818, 4819, 4837]) == [ + 4816, + 4817, + 4818, + 4837, + ] + + # if no years are provided, returns a known leap year and non-leap years + assert converter.representative_years() == [ + converter.LEAP_YEAR, + converter.NON_LEAP_YEAR, + ] + assert converter.representative_years([]) == [ + converter.LEAP_YEAR, + converter.NON_LEAP_YEAR, + ] From c5c1cc80e4de37ccd17b09cd3c7b1cf01aab0f48 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 17:23:57 -0400 Subject: [PATCH 07/12] Support duration for unknown hebrew & islamic years --- src/undate/converters/base.py | 10 ++++++++++ .../converters/calendars/islamic/converter.py | 8 ++++++++ src/undate/undate.py | 13 ++++++++----- tests/test_undate.py | 6 ++++-- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index e305310..1cf1b6d 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -148,6 +148,16 @@ class BaseCalendarConverter(BaseDateConverter): #: Converter name. Subclasses must define a unique name. name: str = "Base Calendar Converter" + #: arbitrary known non-leap year + NON_LEAP_YEAR: int + #: arbitrary known leap year + LEAP_YEAR: int + + # minimum year for this calendar, if there is one + MIN_YEAR: None | int = None + # maximum year for this calendar, if there is one + MAX_YEAR: None | int = None + def min_month(self) -> int: """Smallest numeric month for this calendar.""" raise NotImplementedError diff --git a/src/undate/converters/calendars/islamic/converter.py b/src/undate/converters/calendars/islamic/converter.py index 6995407..b55d20b 100644 --- a/src/undate/converters/calendars/islamic/converter.py +++ b/src/undate/converters/calendars/islamic/converter.py @@ -26,6 +26,11 @@ class IslamicDateConverter(BaseCalendarConverter): #: arbitrary known leap year LEAP_YEAR: int = 1458 + # minimum year for islamic calendar is 1 AH, does not go negative + MIN_YEAR: None | int = 1 + # convertdate gives a month 34 for numpy max year 2.5^16, so scale it back a bit + MAX_YEAR = int(2.5e12) + def __init__(self): self.transformer = IslamicDateTransformer() @@ -71,6 +76,9 @@ def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: """Convert a Hijri date, specified by year, month, and day, to the Gregorian equivalent date. Returns a tuple of year, month, day. """ + print( + f"islamic.to_gregorian(year={year}, month={month}, day={day}) = {islamic.to_gregorian(year, month, day)}" + ) return islamic.to_gregorian(year, month, day) def parse(self, value: str) -> Union[Undate, UndateInterval]: diff --git a/src/undate/undate.py b/src/undate/undate.py index f7d671b..8b9da41 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -128,11 +128,11 @@ def calculate_earliest_latest(self, year, month, day): min_year = int(str(year).replace(self.MISSING_DIGIT, "0")) max_year = int(str(year).replace(self.MISSING_DIGIT, "9")) else: - # use the configured min/max allowable years if we - # don't have any other bounds - # TODO: make calendar-specific? these are min/max for gregorian - min_year = self.MIN_ALLOWABLE_YEAR - max_year = self.MAX_ALLOWABLE_YEAR + # if we don't have any other bounds, + # use calendar-specific min year if there is one, otherwise use + # the configured min/max allowable years + min_year = self.calendar_converter.MIN_YEAR or self.MIN_ALLOWABLE_YEAR + max_year = self.calendar_converter.MAX_YEAR or self.MAX_ALLOWABLE_YEAR # if month is passed in as a string but completely unknown, # treat as unknown/none (date precision already set in init) @@ -192,6 +192,9 @@ def calculate_earliest_latest(self, year, month, day): self.earliest = Date( *self.calendar_converter.to_gregorian(min_year, earliest_month, min_day) ) + print( + f"initializing latest, year={max_year} month={latest_month} day={max_day}" + ) self.latest = Date( *self.calendar_converter.to_gregorian(max_year, latest_month, max_day) ) diff --git a/tests/test_undate.py b/tests/test_undate.py index 20b6b05..d4d46af 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -467,11 +467,13 @@ def test_partiallyknownyear_duration(self): # year duration logic should work in other calendars # islamic assert Undate("108X", calendar="Islamic").duration().days == UnInt(354, 355) - # NOTE: completely unknown years is not yet supported for other calendars, will cause an error - # assert Undate("XXXX", calendar="Islamic").duration().days == UnInt(354, 355) + # completely unknown years is calculated based on representative years + assert Undate("XXXX", calendar="Islamic").duration().days == UnInt(354, 355) assert Undate("536X", calendar="Hebrew").duration().days == UnInt(353, 385) # different set of years could vary assert Undate("53X2", calendar="Hebrew").duration().days == UnInt(354, 385) + # fully unknown year also works for Hebrew calendar + assert Undate("XXX", calendar="Hebrew").duration().days == UnInt(353, 385) def test_known_year(self): assert Undate(2022).known_year is True From 5119ccd29f02028669ed5ae4117a35d6fa92a793 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 17:41:45 -0400 Subject: [PATCH 08/12] Add unit tests for missing coverage --- src/undate/undate.py | 9 +++++-- .../test_hebrew/test_hebrew_converter.py | 7 +++++ .../test_calendars/test_seleucid.py | 6 +++++ tests/test_undate.py | 26 ++++++++++++++++++- 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 8b9da41..a1d7eba 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -35,10 +35,15 @@ class Calendar(StrEnum): def get_converter(calendar) -> BaseCalendarConverter: # calendar converter must be available with a name matching # the title-case name of the calendar enum entry - converter_cls = BaseDateConverter.available_converters()[calendar.value.title()] + try: + converter_cls = BaseDateConverter.available_converters()[ + calendar.value.title() + ] + except KeyError: + raise ValueError(f"Unknown calendar '{calendar}'") if not issubclass(converter_cls, BaseCalendarConverter): raise ValueError( - f"Requested converter {converter_cls} is not a CalendarConverter" + f"Requested converter '{calendar.value.title()}' is not a CalendarConverter" ) return converter_cls() diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py index 538082d..6fe8c96 100644 --- a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py @@ -154,6 +154,13 @@ def test_compare_across_calendars(self): expected_gregorian_years = [-3261, 33, 1056, 1350, 1655, 1995] assert [d.earliest.year for d in sorted_dates] == expected_gregorian_years + def test_days_in_year(self): + converter = HebrewDateConverter() + assert converter.days_in_year(4816) == 353 + assert converter.days_in_year(4817) == 355 + assert converter.days_in_year(4818) == 384 + assert converter.days_in_year(4819) == 355 + def test_representative_years(self): converter = HebrewDateConverter() # single year is not filtered diff --git a/tests/test_converters/test_calendars/test_seleucid.py b/tests/test_converters/test_calendars/test_seleucid.py index fd8bc82..d07e5f1 100644 --- a/tests/test_converters/test_calendars/test_seleucid.py +++ b/tests/test_converters/test_calendars/test_seleucid.py @@ -53,6 +53,12 @@ def test_gregorian_earliest_latest(self): assert date.latest == Date(1146, 10, 15) assert date.label == f"{date_str} {SeleucidDateConverter.calendar_name}" + def test_days_in_year(self): + converter = SeleucidDateConverter() + assert converter.days_in_year(2350) == 354 + assert converter.days_in_year(2349) == 385 + assert converter.days_in_year(2351) == 355 + # TODO: update validation error to say seleucid instead of hebrew diff --git a/tests/test_undate.py b/tests/test_undate.py index d4d46af..945b2fa 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,9 +1,11 @@ from datetime import date, datetime +from enum import auto import pytest from undate import Undate, UndateInterval, Calendar -from undate.converters.base import BaseCalendarConverter +from undate.undate import StrEnum # import whichever version is used there +from undate.converters.base import BaseCalendarConverter, BaseDateConverter from undate.date import Date, DatePrecision, Timedelta, UnDelta, UnInt @@ -545,3 +547,25 @@ def test_calendar_get_converter(): converter = Calendar.get_converter(cal) assert isinstance(converter, BaseCalendarConverter) assert converter.name.lower() == cal.name.lower() + + class BogusCalendar(StrEnum): + """Unsupported calendars""" + + FOOBAR = auto() + DUMMY = auto() + + # test error handling + # ensure we raise a ValueError when an invalid calendar is requested + with pytest.raises(ValueError, match="Unknown calendar"): + Calendar.get_converter(BogusCalendar.FOOBAR) + + class DummyFormatter(BaseDateConverter): + name = "Dummy" + + # also error if you request a converter that is not a calendar converter + # NOTE: this fails because get_converter converts the enum to title case... + # can't be tested with any of the existing non-calendar converters + with pytest.raises( + ValueError, match="Requested converter 'Dummy' is not a CalendarConverter" + ): + Calendar.get_converter(BogusCalendar.DUMMY) From bda835ca5cd5024651049ed752da131bb6bedfcc Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 18:06:39 -0400 Subject: [PATCH 09/12] Clean up print statements and other minor items flagged by @coderabbitai --- .../converters/calendars/islamic/converter.py | 4 +--- src/undate/undate.py | 18 +++++++----------- tests/test_undate.py | 2 +- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/undate/converters/calendars/islamic/converter.py b/src/undate/converters/calendars/islamic/converter.py index b55d20b..67f2a64 100644 --- a/src/undate/converters/calendars/islamic/converter.py +++ b/src/undate/converters/calendars/islamic/converter.py @@ -76,9 +76,7 @@ def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: """Convert a Hijri date, specified by year, month, and day, to the Gregorian equivalent date. Returns a tuple of year, month, day. """ - print( - f"islamic.to_gregorian(year={year}, month={month}, day={day}) = {islamic.to_gregorian(year, month, day)}" - ) + # NOTE: this results in weird numbers for months when year gets sufficiently high return islamic.to_gregorian(year, month, day) def parse(self, value: str) -> Union[Undate, UndateInterval]: diff --git a/src/undate/undate.py b/src/undate/undate.py index a1d7eba..e6561bf 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -39,8 +39,8 @@ def get_converter(calendar) -> BaseCalendarConverter: converter_cls = BaseDateConverter.available_converters()[ calendar.value.title() ] - except KeyError: - raise ValueError(f"Unknown calendar '{calendar}'") + except KeyError as err: + raise ValueError(f"Unknown calendar '{calendar}'") from err if not issubclass(converter_cls, BaseCalendarConverter): raise ValueError( f"Requested converter '{calendar.value.title()}' is not a CalendarConverter" @@ -197,9 +197,6 @@ def calculate_earliest_latest(self, year, month, day): self.earliest = Date( *self.calendar_converter.to_gregorian(min_year, earliest_month, min_day) ) - print( - f"initializing latest, year={max_year} month={latest_month} day={max_day}" - ) self.latest = Date( *self.calendar_converter.to_gregorian(max_year, latest_month, max_day) ) @@ -495,13 +492,12 @@ def possible_years(self) -> list[int] | range: # convert place to 1, 10, 100, 1000, etc. step = 10 ** (missing_digit_place - 1) return range(earliest_year, latest_year + 1, step) - else: # year is fully unknown - # returning range from min year to max year is not useful in any scenario! - raise ValueError( - "Possible years cannot be returned for completely unknown year" - ) - return [] # shouldn't get here, but mypy complains + # otherwise, year is fully unknown + # returning range from min year to max year is not useful in any scenario! + raise ValueError( + "Possible years cannot be returned for completely unknown year" + ) @property def representative_years(self) -> list[int]: diff --git a/tests/test_undate.py b/tests/test_undate.py index 945b2fa..ef9ff42 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -407,7 +407,7 @@ def test_possible_years(self): with pytest.raises( ValueError, match="cannot be returned for completely unknown year" ): - Undate("XXXX").possible_years + assert Undate("XXXX").possible_years def test_representative_years(self): # single year is returned as is From 7af0628768744218c2b6646a92d181e5456794db Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 18:16:04 -0400 Subject: [PATCH 10/12] Additional testing --- tests/test_converters/test_base.py | 2 ++ tests/test_undate.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/tests/test_converters/test_base.py b/tests/test_converters/test_base.py index a4ac52d..6265c15 100644 --- a/tests/test_converters/test_base.py +++ b/tests/test_converters/test_base.py @@ -91,3 +91,5 @@ def test_not_implemented(self): BaseCalendarConverter().max_day(1900, 12) with pytest.raises(NotImplementedError): BaseCalendarConverter().to_gregorian(1900, 12, 31) + with pytest.raises(NotImplementedError): + BaseCalendarConverter().representative_years([1900, 1901]) diff --git a/tests/test_undate.py b/tests/test_undate.py index ef9ff42..2cbaf7d 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,5 +1,6 @@ from datetime import date, datetime from enum import auto +from unittest import mock import pytest @@ -415,6 +416,24 @@ def test_representative_years(self): # for an uncertain year, returns first leap year and non-leap year in range assert Undate("190X").representative_years == [1900, 1904] assert Undate("19XX").representative_years == [1900, 1904] + # works for other calendars + assert Undate("481X", calendar="Hebrew").representative_years == [ + 4810, + 4811, + 4812, + 4813, + 4816, + 4818, + ] + + # use mock to simulate a calendar without representative years filtering + with mock.patch( + "undate.converters.calendars.HebrewDateConverter.representative_years" + ) as mock_representative_years: + mock_representative_years.side_effect = NotImplementedError + assert Undate("481X", calendar="Hebrew").representative_years == list( + range(4810, 4820) + ) def test_duration(self): day_duration = Undate(2022, 11, 7).duration() From b5e9c7fcb520386fa15f5a59aea2809efcea0ebf Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 19:40:42 -0400 Subject: [PATCH 11/12] Add test for gregorian converter to version control --- .../test_calendars/test_gregorian.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tests/test_converters/test_calendars/test_gregorian.py diff --git a/tests/test_converters/test_calendars/test_gregorian.py b/tests/test_converters/test_calendars/test_gregorian.py new file mode 100644 index 0000000..ec0c599 --- /dev/null +++ b/tests/test_converters/test_calendars/test_gregorian.py @@ -0,0 +1,35 @@ +from undate.converters.calendars import GregorianDateConverter + + +class TestGregorianDateConverter: + def test_to_gregorian(self): + converter = GregorianDateConverter() + # conversion is a no-op, returns values unchanged + assert converter.to_gregorian(2025, 6, 15) == (2025, 6, 15) + + def test_min_month(self): + assert GregorianDateConverter().min_month() == 1 + + def test_max_month(self): + assert GregorianDateConverter().max_month(2025) == 12 + + # todo: test max day + # + def test_representative_years(self): + converter = GregorianDateConverter() + # single year is not filtered + assert converter.representative_years([2025]) == [2025] + # multiple non-leap years, returns just the first + assert converter.representative_years([2025, 2026]) == [2025] + # next leap year is 2028; returns first leap year and first non-leap year, in input order + assert converter.representative_years([2025, 2026, 2028, 2029]) == [2025, 2028] + + # if no years are provided, returns a known leap year and non-leap years + assert converter.representative_years() == [ + converter.LEAP_YEAR, + converter.NON_LEAP_YEAR, + ] + assert converter.representative_years([]) == [ + converter.LEAP_YEAR, + converter.NON_LEAP_YEAR, + ] From 7170433c754e112e8154f999424665e35d76c09f Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 25 Jun 2025 22:18:21 -0400 Subject: [PATCH 12/12] Fix typo in comment and add tests for Gregorian month max day --- .../test_converters/test_calendars/test_gregorian.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/test_converters/test_calendars/test_gregorian.py b/tests/test_converters/test_calendars/test_gregorian.py index ec0c599..e0bf5ef 100644 --- a/tests/test_converters/test_calendars/test_gregorian.py +++ b/tests/test_converters/test_calendars/test_gregorian.py @@ -13,8 +13,13 @@ def test_min_month(self): def test_max_month(self): assert GregorianDateConverter().max_month(2025) == 12 - # todo: test max day - # + def test_max_day(self): + converter = GregorianDateConverter() + assert converter.max_day(2025, 1) == 31 + assert converter.max_day(2025, 2) == 28 + assert converter.max_day(converter.LEAP_YEAR, 2) == 29 + assert converter.max_day(2025, 12) == 31 + def test_representative_years(self): converter = GregorianDateConverter() # single year is not filtered @@ -24,7 +29,7 @@ def test_representative_years(self): # next leap year is 2028; returns first leap year and first non-leap year, in input order assert converter.representative_years([2025, 2026, 2028, 2029]) == [2025, 2028] - # if no years are provided, returns a known leap year and non-leap years + # if no years are provided, returns a known leap year and non-leap year assert converter.representative_years() == [ converter.LEAP_YEAR, converter.NON_LEAP_YEAR,