From 1148c905ecac8defb51961b8760a276f6377711e Mon Sep 17 00:00:00 2001 From: Robert Casties Date: Thu, 10 Nov 2022 15:28:58 +0100 Subject: [PATCH 01/77] add ismi sample data (see #17) --- examples/ismi/README.md | 24 +++ examples/ismi/data/ismi-om4-date-samples.json | 186 ++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 examples/ismi/README.md create mode 100644 examples/ismi/data/ismi-om4-date-samples.json diff --git a/examples/ismi/README.md b/examples/ismi/README.md new file mode 100644 index 0000000..2340bc9 --- /dev/null +++ b/examples/ismi/README.md @@ -0,0 +1,24 @@ +# Sample data from the ISMI project database + +The [Islamic Scientific Manuscript Initiative project](https://ismi.mpwig-berlin.mpg.de) aims to collect information about all Islamic Manuscripts in the exact sciences from the 9th to the 19th centuries CE. + +The old [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) database OpenMind (OM4) stores historical dates as JSON objects with the following structure: + +- `state` + - "unknown": no date + - "not checked": unparsed date in `date_in_text` + - "known": date or date range entered in specified calendar + - `calendar_type`: calendar the date was entered in + - "Julian", "Islamic", "Gregorian" + - `input_form`: date type + - "Year" + - `from`: first day, `until`: last day of year (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) + - "Range" + - `from`: first day, `until`: last day of range (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) + - "Date" + - `date`: given day (date in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) + - `additional_info`: textual note with additional information + +We plan to extract as much as possible of this data in the migration to the new RDF database with a CIDOC-CRM-based data model. + +A sample file with dates of each type can be found in `data/ismi-om4-date-samples.json` diff --git a/examples/ismi/data/ismi-om4-date-samples.json b/examples/ismi/data/ismi-om4-date-samples.json new file mode 100644 index 0000000..4300195 --- /dev/null +++ b/examples/ismi/data/ismi-om4-date-samples.json @@ -0,0 +1,186 @@ +[ + { + "state": "unknown" + }, + { + "date_in_text": "8th/14th century", + "state": "not checked" + }, + { + "additional_info": "year 1233 in Julian calendar", + "calendar_type": "Julian", + "from": { + "ambiguity": 0, + "century": 13, + "dayOfMonth": 8, + "dayOfYear": 8, + "month": 1, + "year": 1232 + }, + "input_form": "Year", + "state": "known", + "until": { + "ambiguity": 0, + "century": 13, + "dayOfMonth": 7, + "dayOfYear": 7, + "month": 1, + "year": 1233 + }, + "year": 1232 + }, + { + "additional_info": "year 1205 in Islamic calendar", + "calendar_type": "Islamic", + "from": { + "ambiguity": 2, + "century": 18, + "dayOfMonth": 10, + "dayOfYear": 253, + "month": 9, + "year": 1790 + }, + "input_form": "Year", + "state": "known", + "until": { + "ambiguity": 2, + "century": 18, + "dayOfMonth": 29, + "dayOfYear": 241, + "month": 8, + "year": 1791 + }, + "year": 1205 + }, + { + "additional_info": "year 1564 in Gregorian calendar", + "calendar_type": "Gregorian", + "from": { + "ambiguity": 0, + "century": 16, + "dayOfMonth": 1, + "dayOfYear": 1, + "month": 1, + "year": 1564 + }, + "input_form": "Year", + "state": "known", + "until": { + "ambiguity": 0, + "century": 16, + "dayOfMonth": 31, + "dayOfYear": 366, + "month": 12, + "year": 1564 + }, + "year": 1564 + }, + { + "additional_info": "3. Martius(3) 1481 (1481-03-03) in Julian calendar (1481-03-12 Gregorian)", + "calendar_type": "Julian", + "date": { + "ambiguity": 0, + "century": 15, + "dayOfMonth": 12, + "dayOfYear": 71, + "month": 3, + "year": 1481 + }, + "input_form": "Date", + "state": "known" + }, + { + "additional_info": "6. Muḥarram(1) 888 in Islamic calendar (1483-02-23[+-2] Gregorian)", + "calendar_type": "Islamic", + "date": { + "ambiguity": 2, + "century": 15, + "dayOfMonth": 23, + "dayOfYear": 54, + "month": 2, + "year": 1483 + }, + "input_form": "Date", + "state": "known" + }, + { + "additional_info": "1. September(9) 1621 in Gregorian calendar", + "calendar_type": "Gregorian", + "date": { + "ambiguity": 0, + "century": 17, + "dayOfMonth": 1, + "dayOfYear": 244, + "month": 9, + "year": 1621 + }, + "input_form": "Date", + "state": "known" + }, + { + "additional_info": "1. Ianuarius(1) 811 - 31. December(12) 811 in Julian calendar", + "calendar_type": "Julian", + "from": { + "ambiguity": 0, + "century": 9, + "dayOfMonth": 5, + "dayOfYear": 5, + "month": 1, + "year": 811 + }, + "input_form": "Range", + "state": "known", + "until": { + "ambiguity": 0, + "century": 9, + "dayOfMonth": 4, + "dayOfYear": 4, + "month": 1, + "year": 812 + } + }, + { + "additional_info": "1. Muḥarram(1) 1000 - 29. Ḏu al-Ḥijjaẗ(12) 1024 in Islamic calendar", + "calendar_type": "Islamic", + "from": { + "ambiguity": 2, + "century": 16, + "dayOfMonth": 19, + "dayOfYear": 292, + "month": 10, + "year": 1591 + }, + "input_form": "Range", + "state": "known", + "until": { + "ambiguity": 2, + "century": 17, + "dayOfMonth": 19, + "dayOfYear": 19, + "month": 1, + "year": 1616 + } + }, + { + "additional_info": "1650-01-01 - 1699-01-01 in Gregorian calendar", + "calendar_type": "Gregorian", + "from": { + "ambiguity": 0, + "century": 17, + "dayOfMonth": 1, + "dayOfYear": 1, + "month": 1, + "year": 1650 + }, + "input_form": "Range", + "state": "known", + "until": { + "ambiguity": 0, + "century": 17, + "dayOfMonth": 1, + "dayOfYear": 1, + "month": 1, + "year": 1699 + } + } +] From 24be8e835d2131b4b92e4239559bdf032fcb4e8c Mon Sep 17 00:00:00 2001 From: Robert Casties Date: Mon, 18 Nov 2024 19:07:38 +0100 Subject: [PATCH 02/77] update ISMI sample data and README with current CIDOC-CRM model. --- examples/ismi/README.md | 34 ++-- examples/ismi/data/ismi-crm-date-samples.ttl | 104 ++++++++++ examples/ismi/data/ismi-om4-date-samples.json | 186 ------------------ 3 files changed, 120 insertions(+), 204 deletions(-) create mode 100644 examples/ismi/data/ismi-crm-date-samples.ttl delete mode 100644 examples/ismi/data/ismi-om4-date-samples.json diff --git a/examples/ismi/README.md b/examples/ismi/README.md index 2340bc9..e18c797 100644 --- a/examples/ismi/README.md +++ b/examples/ismi/README.md @@ -2,23 +2,21 @@ The [Islamic Scientific Manuscript Initiative project](https://ismi.mpwig-berlin.mpg.de) aims to collect information about all Islamic Manuscripts in the exact sciences from the 9th to the 19th centuries CE. -The old [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) database OpenMind (OM4) stores historical dates as JSON objects with the following structure: +The new [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) stores historical dates as CIDOC-CRM RDF objects with the following structure: -- `state` - - "unknown": no date - - "not checked": unparsed date in `date_in_text` - - "known": date or date range entered in specified calendar - - `calendar_type`: calendar the date was entered in - - "Julian", "Islamic", "Gregorian" - - `input_form`: date type - - "Year" - - `from`: first day, `until`: last day of year (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - "Range" - - `from`: first day, `until`: last day of range (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - "Date" - - `date`: given day (date in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - `additional_info`: textual note with additional information +- `crm:E52_Time-Span` + - `crm:P2_has_type`: date type + - "datetype:day" + - `crm:P82_at_some_time_within`: given day (xsd:date) + - "datetype:year" + - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of year (xsd:date) + - "datetype:range" + - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of range (xsd:date) + - `crm:P1_is_identified_by` + - `crm:E41_Appellation` + - `rdfs:label`: textual representation of timespan (e.g. "901 Rabīʿ I 14 (islamic)") + - `crm:P2_has_type`: calendar type (calendar the date was entered in) + - "calendartype:julian", "calendartype:islamic", "calendartype:gregorian" + - `crm:P3_has_note`: textual note with additional information -We plan to extract as much as possible of this data in the migration to the new RDF database with a CIDOC-CRM-based data model. - -A sample file with dates of each type can be found in `data/ismi-om4-date-samples.json` +A sample file with dates of each type can be found in `data/ismi-crm-date-samples.ttl` diff --git a/examples/ismi/data/ismi-crm-date-samples.ttl b/examples/ismi/data/ismi-crm-date-samples.ttl new file mode 100644 index 0000000..4c5a115 --- /dev/null +++ b/examples/ismi/data/ismi-crm-date-samples.ttl @@ -0,0 +1,104 @@ +@prefix rdfs: . +@prefix crm: . +@prefix xsd: . +# prefix for date and calendar type URIs +@prefix datetype: . +@prefix calendartype: . +# prefix for sample data +@prefix : . + +# day-precision date in islamic calendar +:date1 a crm:E52_Time-Span ; + crm:P2_has_type datetype:day ; + crm:P82_at_some_time_within "1495-12-11"^^xsd:date ; + crm:P3_has_note "day-precision date in islamic calendar" ; + crm:P1_is_identified_by :date1-label . +:date1-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:islamic ; + rdfs:label "901 Rabīʿ I 14 (islamic)" . + +# year-precision date in islamic calendar +:date2 a crm:E52_Time-Span ; + crm:P2_has_type datetype:year ; + crm:P82a_begin_of_the_begin "1479-04-03"^^xsd:date ; + crm:P82b_end_of_the_end "1480-03-21"^^xsd:date ; + crm:P3_has_note "year-precision date in islamic calendar" ; + crm:P1_is_identified_by :date2-label . +:date2-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:islamic ; + rdfs:label "884 (islamic)" . + +# range-type (century in islamic calendar) date in islamic calendar +:date3 a crm:E52_Time-Span ; + crm:P2_has_type datetype:range ; + crm:P82a_begin_of_the_begin "1494-10-11"^^xsd:date ; + crm:P82b_end_of_the_end "1591-10-18"^^xsd:date ; + crm:P3_has_note "range-type (century in islamic calendar) date in islamic calendar" ; + crm:P1_is_identified_by :date3-label . +:date3-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:islamic ; + rdfs:label "900 Muḥarram 1 - 999 Ḏu al-Ḥijjaẗ 29 (islamic)" . + +# day-precision date in gregorian calendar +:date4 a crm:E52_Time-Span ; + crm:P2_has_type datetype:day ; + crm:P82_at_some_time_within "1830-02-08"^^xsd:date ; + crm:P3_has_note "day-precision date in gregorian calendar" ; + crm:P1_is_identified_by :date4-label . +:date4-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:gregorian ; + rdfs:label "1830 February 8 (gregorian)" . + +# year-precision date in gregorian calendar +:date5 a crm:E52_Time-Span ; + crm:P2_has_type datetype:year ; + crm:P82a_begin_of_the_begin "1796-01-01"^^xsd:date ; + crm:P82b_end_of_the_end "1796-12-31"^^xsd:date ; + crm:P3_has_note "year-precision date in gregorian calendar" ; + crm:P1_is_identified_by :date5-label . +:date5-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:gregorian ; + rdfs:label "1796 (gregorian)" . + +# range-type (century in gregorian calendar) date in gregorian calendar +:date6 a crm:E52_Time-Span ; + crm:P2_has_type datetype:range ; + crm:P82a_begin_of_the_begin "1600-01-01"^^xsd:date ; + crm:P82b_end_of_the_end "1699-12-31"^^xsd:date ; + crm:P3_has_note "range-type (century in gregorian calendar) date in gregorian calendar" ; + crm:P1_is_identified_by :date6-label . +:date6-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:gregorian ; + rdfs:label "1600 January 1 - 1699 December 31 (gregorian)" . + +# day-precision date in julian calendar +:date7 a crm:E52_Time-Span ; + crm:P2_has_type datetype:day ; + crm:P82_at_some_time_within "1035-06-04"^^xsd:date ; + crm:P3_has_note "day-precision date in julian calendar" ; + crm:P1_is_identified_by :date7-label . +:date7-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:julian ; + rdfs:label "1035 May 29 (julian)" . + +# year-precision date in julian calendar +:date8 a crm:E52_Time-Span ; + crm:P2_has_type datetype:year ; + crm:P82a_begin_of_the_begin "1013-01-07"^^xsd:date ; + crm:P82b_end_of_the_end "1014-01-06"^^xsd:date ; + crm:P3_has_note "year-precision date in julian calendar" ; + crm:P1_is_identified_by :date8-label . +:date8-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:julian ; + rdfs:label "1013 (julian)" . + +# range-type (century in julian calendar) date in julian calendar +:date9 a crm:E52_Time-Span ; + crm:P2_has_type datetype:range ; + crm:P82a_begin_of_the_begin "1200-01-08"^^xsd:date ; + crm:P82b_end_of_the_end "1300-01-07"^^xsd:date ; + crm:P3_has_note "range-type (century in julian calendar) date in julian calendar" ; + crm:P1_is_identified_by :date9-label . +:date9-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:julian ; + rdfs:label "1200 January 1 - 1299 December 31 (julian)" . diff --git a/examples/ismi/data/ismi-om4-date-samples.json b/examples/ismi/data/ismi-om4-date-samples.json deleted file mode 100644 index 4300195..0000000 --- a/examples/ismi/data/ismi-om4-date-samples.json +++ /dev/null @@ -1,186 +0,0 @@ -[ - { - "state": "unknown" - }, - { - "date_in_text": "8th/14th century", - "state": "not checked" - }, - { - "additional_info": "year 1233 in Julian calendar", - "calendar_type": "Julian", - "from": { - "ambiguity": 0, - "century": 13, - "dayOfMonth": 8, - "dayOfYear": 8, - "month": 1, - "year": 1232 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 0, - "century": 13, - "dayOfMonth": 7, - "dayOfYear": 7, - "month": 1, - "year": 1233 - }, - "year": 1232 - }, - { - "additional_info": "year 1205 in Islamic calendar", - "calendar_type": "Islamic", - "from": { - "ambiguity": 2, - "century": 18, - "dayOfMonth": 10, - "dayOfYear": 253, - "month": 9, - "year": 1790 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 2, - "century": 18, - "dayOfMonth": 29, - "dayOfYear": 241, - "month": 8, - "year": 1791 - }, - "year": 1205 - }, - { - "additional_info": "year 1564 in Gregorian calendar", - "calendar_type": "Gregorian", - "from": { - "ambiguity": 0, - "century": 16, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1564 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 0, - "century": 16, - "dayOfMonth": 31, - "dayOfYear": 366, - "month": 12, - "year": 1564 - }, - "year": 1564 - }, - { - "additional_info": "3. Martius(3) 1481 (1481-03-03) in Julian calendar (1481-03-12 Gregorian)", - "calendar_type": "Julian", - "date": { - "ambiguity": 0, - "century": 15, - "dayOfMonth": 12, - "dayOfYear": 71, - "month": 3, - "year": 1481 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "6. Muḥarram(1) 888 in Islamic calendar (1483-02-23[+-2] Gregorian)", - "calendar_type": "Islamic", - "date": { - "ambiguity": 2, - "century": 15, - "dayOfMonth": 23, - "dayOfYear": 54, - "month": 2, - "year": 1483 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "1. September(9) 1621 in Gregorian calendar", - "calendar_type": "Gregorian", - "date": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 244, - "month": 9, - "year": 1621 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "1. Ianuarius(1) 811 - 31. December(12) 811 in Julian calendar", - "calendar_type": "Julian", - "from": { - "ambiguity": 0, - "century": 9, - "dayOfMonth": 5, - "dayOfYear": 5, - "month": 1, - "year": 811 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 0, - "century": 9, - "dayOfMonth": 4, - "dayOfYear": 4, - "month": 1, - "year": 812 - } - }, - { - "additional_info": "1. Muḥarram(1) 1000 - 29. Ḏu al-Ḥijjaẗ(12) 1024 in Islamic calendar", - "calendar_type": "Islamic", - "from": { - "ambiguity": 2, - "century": 16, - "dayOfMonth": 19, - "dayOfYear": 292, - "month": 10, - "year": 1591 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 2, - "century": 17, - "dayOfMonth": 19, - "dayOfYear": 19, - "month": 1, - "year": 1616 - } - }, - { - "additional_info": "1650-01-01 - 1699-01-01 in Gregorian calendar", - "calendar_type": "Gregorian", - "from": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1650 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1699 - } - } -] From 0e36e45cdd809c7cd910ec59c8d7ba2845bd2f4c Mon Sep 17 00:00:00 2001 From: Robert Casties Date: Mon, 18 Nov 2024 19:12:49 +0100 Subject: [PATCH 03/77] move to new directory. --- examples/ismi/README.md | 22 --- examples/use-cases/ismi/README.md | 34 ++-- .../ismi/data/ismi-crm-date-samples.ttl | 0 .../ismi/data/ismi-om4-date-samples.json | 186 ------------------ 4 files changed, 16 insertions(+), 226 deletions(-) delete mode 100644 examples/ismi/README.md rename examples/{ => use-cases}/ismi/data/ismi-crm-date-samples.ttl (100%) delete mode 100644 examples/use-cases/ismi/data/ismi-om4-date-samples.json diff --git a/examples/ismi/README.md b/examples/ismi/README.md deleted file mode 100644 index e18c797..0000000 --- a/examples/ismi/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Sample data from the ISMI project database - -The [Islamic Scientific Manuscript Initiative project](https://ismi.mpwig-berlin.mpg.de) aims to collect information about all Islamic Manuscripts in the exact sciences from the 9th to the 19th centuries CE. - -The new [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) stores historical dates as CIDOC-CRM RDF objects with the following structure: - -- `crm:E52_Time-Span` - - `crm:P2_has_type`: date type - - "datetype:day" - - `crm:P82_at_some_time_within`: given day (xsd:date) - - "datetype:year" - - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of year (xsd:date) - - "datetype:range" - - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of range (xsd:date) - - `crm:P1_is_identified_by` - - `crm:E41_Appellation` - - `rdfs:label`: textual representation of timespan (e.g. "901 Rabīʿ I 14 (islamic)") - - `crm:P2_has_type`: calendar type (calendar the date was entered in) - - "calendartype:julian", "calendartype:islamic", "calendartype:gregorian" - - `crm:P3_has_note`: textual note with additional information - -A sample file with dates of each type can be found in `data/ismi-crm-date-samples.ttl` diff --git a/examples/use-cases/ismi/README.md b/examples/use-cases/ismi/README.md index 2340bc9..e18c797 100644 --- a/examples/use-cases/ismi/README.md +++ b/examples/use-cases/ismi/README.md @@ -2,23 +2,21 @@ The [Islamic Scientific Manuscript Initiative project](https://ismi.mpwig-berlin.mpg.de) aims to collect information about all Islamic Manuscripts in the exact sciences from the 9th to the 19th centuries CE. -The old [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) database OpenMind (OM4) stores historical dates as JSON objects with the following structure: +The new [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) stores historical dates as CIDOC-CRM RDF objects with the following structure: -- `state` - - "unknown": no date - - "not checked": unparsed date in `date_in_text` - - "known": date or date range entered in specified calendar - - `calendar_type`: calendar the date was entered in - - "Julian", "Islamic", "Gregorian" - - `input_form`: date type - - "Year" - - `from`: first day, `until`: last day of year (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - "Range" - - `from`: first day, `until`: last day of range (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - "Date" - - `date`: given day (date in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - `additional_info`: textual note with additional information +- `crm:E52_Time-Span` + - `crm:P2_has_type`: date type + - "datetype:day" + - `crm:P82_at_some_time_within`: given day (xsd:date) + - "datetype:year" + - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of year (xsd:date) + - "datetype:range" + - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of range (xsd:date) + - `crm:P1_is_identified_by` + - `crm:E41_Appellation` + - `rdfs:label`: textual representation of timespan (e.g. "901 Rabīʿ I 14 (islamic)") + - `crm:P2_has_type`: calendar type (calendar the date was entered in) + - "calendartype:julian", "calendartype:islamic", "calendartype:gregorian" + - `crm:P3_has_note`: textual note with additional information -We plan to extract as much as possible of this data in the migration to the new RDF database with a CIDOC-CRM-based data model. - -A sample file with dates of each type can be found in `data/ismi-om4-date-samples.json` +A sample file with dates of each type can be found in `data/ismi-crm-date-samples.ttl` diff --git a/examples/ismi/data/ismi-crm-date-samples.ttl b/examples/use-cases/ismi/data/ismi-crm-date-samples.ttl similarity index 100% rename from examples/ismi/data/ismi-crm-date-samples.ttl rename to examples/use-cases/ismi/data/ismi-crm-date-samples.ttl diff --git a/examples/use-cases/ismi/data/ismi-om4-date-samples.json b/examples/use-cases/ismi/data/ismi-om4-date-samples.json deleted file mode 100644 index 4300195..0000000 --- a/examples/use-cases/ismi/data/ismi-om4-date-samples.json +++ /dev/null @@ -1,186 +0,0 @@ -[ - { - "state": "unknown" - }, - { - "date_in_text": "8th/14th century", - "state": "not checked" - }, - { - "additional_info": "year 1233 in Julian calendar", - "calendar_type": "Julian", - "from": { - "ambiguity": 0, - "century": 13, - "dayOfMonth": 8, - "dayOfYear": 8, - "month": 1, - "year": 1232 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 0, - "century": 13, - "dayOfMonth": 7, - "dayOfYear": 7, - "month": 1, - "year": 1233 - }, - "year": 1232 - }, - { - "additional_info": "year 1205 in Islamic calendar", - "calendar_type": "Islamic", - "from": { - "ambiguity": 2, - "century": 18, - "dayOfMonth": 10, - "dayOfYear": 253, - "month": 9, - "year": 1790 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 2, - "century": 18, - "dayOfMonth": 29, - "dayOfYear": 241, - "month": 8, - "year": 1791 - }, - "year": 1205 - }, - { - "additional_info": "year 1564 in Gregorian calendar", - "calendar_type": "Gregorian", - "from": { - "ambiguity": 0, - "century": 16, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1564 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 0, - "century": 16, - "dayOfMonth": 31, - "dayOfYear": 366, - "month": 12, - "year": 1564 - }, - "year": 1564 - }, - { - "additional_info": "3. Martius(3) 1481 (1481-03-03) in Julian calendar (1481-03-12 Gregorian)", - "calendar_type": "Julian", - "date": { - "ambiguity": 0, - "century": 15, - "dayOfMonth": 12, - "dayOfYear": 71, - "month": 3, - "year": 1481 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "6. Muḥarram(1) 888 in Islamic calendar (1483-02-23[+-2] Gregorian)", - "calendar_type": "Islamic", - "date": { - "ambiguity": 2, - "century": 15, - "dayOfMonth": 23, - "dayOfYear": 54, - "month": 2, - "year": 1483 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "1. September(9) 1621 in Gregorian calendar", - "calendar_type": "Gregorian", - "date": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 244, - "month": 9, - "year": 1621 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "1. Ianuarius(1) 811 - 31. December(12) 811 in Julian calendar", - "calendar_type": "Julian", - "from": { - "ambiguity": 0, - "century": 9, - "dayOfMonth": 5, - "dayOfYear": 5, - "month": 1, - "year": 811 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 0, - "century": 9, - "dayOfMonth": 4, - "dayOfYear": 4, - "month": 1, - "year": 812 - } - }, - { - "additional_info": "1. Muḥarram(1) 1000 - 29. Ḏu al-Ḥijjaẗ(12) 1024 in Islamic calendar", - "calendar_type": "Islamic", - "from": { - "ambiguity": 2, - "century": 16, - "dayOfMonth": 19, - "dayOfYear": 292, - "month": 10, - "year": 1591 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 2, - "century": 17, - "dayOfMonth": 19, - "dayOfYear": 19, - "month": 1, - "year": 1616 - } - }, - { - "additional_info": "1650-01-01 - 1699-01-01 in Gregorian calendar", - "calendar_type": "Gregorian", - "from": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1650 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1699 - } - } -] From 1457bcb001bd6b8e6469573a93e22f9cad9067ce Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 18 Nov 2024 17:32:02 -0500 Subject: [PATCH 04/77] Set develop version to 0.4-dev --- src/undate/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/undate/__init__.py b/src/undate/__init__.py index 2593563..85665ff 100644 --- a/src/undate/__init__.py +++ b/src/undate/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.3.0" +__version__ = "0.4.0.dev0" from undate.date import DatePrecision from undate.undate import Undate, UndateInterval From c83ed49dbe8ca4a9370239ee7b86f6b7034b0830 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 19 Nov 2024 15:16:33 -0500 Subject: [PATCH 05/77] Fix spelling and formatting errors in changelog --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4003b14..260a330 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## 0.3 -- Updated to use numpy `datetime64` to support a greater range of years beyond the 4-digit years supported by python's builtin ``datetime.date` +- Updated to use numpy `datetime64` to support a greater range of years beyond the 4-digit years supported by python's builtin `datetime.date` - Custom `Date` and `Timedelta` objects as shims to make numpy datetime64 and timedelta64 act more like python `datetime` objects - Renamed formatters to converters for more flexibility / scope - Support using different converters with new `format` and `parse` methods on `Undate` @@ -11,7 +11,7 @@ - Jupyter notebook demonstrating / validating EDTF support - Full support for Level 0 Date and Time Interval (no Date and Time support) - Level 1: - - Letter-prefixed cbalendar year + - Letter-prefixed calendar year - Unspecified digit from the right - Partial support for extended interval - Level 2: unspecified digit anywhere in the date @@ -24,7 +24,7 @@ ### numpy impact -Performance differences seem to be negligible, but it does increase payloud size. The virtualenv for installing version 0.2 was 14MB; when installing the newer version with numpy, the virtualenv is 46MB (the numpy folder in site packages is 31MB on its own). +Performance differences seem to be negligible, but it does increase payload size. The virtualenv for installing version 0.2 was 14MB; when installing the newer version with numpy, the virtualenv is 46MB (the numpy folder in site packages is 31MB on its own). ## 0.2 From f0ee32ce62aaed14f22b9ccd74f27e0c08bfb522 Mon Sep 17 00:00:00 2001 From: Robert Casties Date: Thu, 21 Nov 2024 13:59:58 +0100 Subject: [PATCH 06/77] add notebook with first examples of converting ismi dates to undate. --- examples/use-cases/ismi/ismi-dates.ipynb | 179 +++++++++++++++++++++++ examples/use-cases/ismi/requirements.txt | 2 + 2 files changed, 181 insertions(+) create mode 100644 examples/use-cases/ismi/ismi-dates.ipynb create mode 100644 examples/use-cases/ismi/requirements.txt diff --git a/examples/use-cases/ismi/ismi-dates.ipynb b/examples/use-cases/ismi/ismi-dates.ipynb new file mode 100644 index 0000000..098fbfd --- /dev/null +++ b/examples/use-cases/ismi/ismi-dates.ipynb @@ -0,0 +1,179 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0fa36628-ccf2-4977-8c4c-e0a85e2b37b6", + "metadata": {}, + "source": [ + "# Working with ISMI project dates" + ] + }, + { + "cell_type": "markdown", + "id": "ffd4b544-8957-494e-9e09-b703d68bb7df", + "metadata": {}, + "source": [ + "## Load date samples from RDF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8a36e7f-6057-44d1-8466-6709910d4249", + "metadata": {}, + "outputs": [], + "source": [ + "from rdflib import Graph, RDF, URIRef\n", + "from rdflib.namespace import Namespace, RDFS\n", + "from undate.undate import Undate\n", + "\n", + "# additional RDF namespaces\n", + "crmNs = Namespace('http://www.cidoc-crm.org/cidoc-crm/')\n", + "\n", + "g = Graph()\n", + "g.bind('crm', crmNs)\n", + "# load ISMI RDF sample data\n", + "g.parse('data/ismi-crm-date-samples.ttl')\n", + "# check: number of triples\n", + "len(g)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c940ca2b-b369-4511-8dc9-420bdaeb3e65", + "metadata": {}, + "outputs": [], + "source": [ + "date_uris = [u for u in g.subjects(RDF.type, crmNs['E52_Time-Span'])]\n", + "\n", + "for uri in date_uris:\n", + " q = '''SELECT ?uri ?label ?note \n", + " WHERE { \n", + " ?uri crm:P3_has_note ?note ;\n", + " crm:P1_is_identified_by / rdfs:label ?label .\n", + " } limit 10'''\n", + " res = g.query(q, initBindings={'uri': uri})\n", + " for r in res:\n", + " print(f\"uri={str(uri)} label={r.label} note={r.note}\")" + ] + }, + { + "cell_type": "markdown", + "id": "16361060-657f-431c-b70f-9101d550aa38", + "metadata": {}, + "source": [ + "## Convert RDF dates to Undate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e443b974-930b-4a5d-8f21-641b4556b159", + "metadata": {}, + "outputs": [], + "source": [ + "from undate.date import DatePrecision, Date\n", + "import datetime\n", + "\n", + "uri = date_uris[1]\n", + "\n", + "#\n", + "# read date type\n", + "#\n", + "date_type = None\n", + "for date_type_uri in g.objects(uri, crmNs.P2_has_type):\n", + " for dt in ['day', 'year', 'range']:\n", + " if str(date_type_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/date/' + dt:\n", + " date_type = dt\n", + "\n", + "if not date_type:\n", + " raise RuntimeError(f\"Unknown datetype URI {date_type_uri}\")\n", + "\n", + "#\n", + "# read label and calendar\n", + "#\n", + "date_label_uri = next(g.objects(uri, crmNs.P1_is_identified_by))\n", + "date_label = str(next(g.objects(date_label_uri, RDFS.label)))\n", + "for date_label_calendar_uri in g.objects(date_label_uri, crmNs.P2_has_type):\n", + " for ct in ['gregorian', 'julian', 'islamic']:\n", + " if str(date_label_calendar_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/calendar/' + ct:\n", + " calendar_type = ct\n", + "\n", + "if not calendar_type:\n", + " raise RuntimeError(f\"Unknown calendar type URI {date_label_calendar_uri}\")\n", + "\n", + "#\n", + "# create undate\n", + "#\n", + "if date_type == 'day':\n", + " xsd_date = next(g.objects(uri, crmNs.P82_at_some_time_within))\n", + " date = Undate.parse(str(xsd_date), 'ISO8601')\n", + " date.precision = DatePrecision.DAY\n", + " date.label = date_label\n", + "\n", + "if date_type == 'year':\n", + " xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n", + " xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n", + " date_from = datetime.date.fromisoformat(str(xsd_date_from))\n", + " if calendar_type == 'gregorian':\n", + " # this should be fine\n", + " date = Undate(year=date_from.year)\n", + "\n", + " else:\n", + " # create day precision Undate from end date\n", + " date = Undate.parse(str(xsd_date_until), 'ISO8601')\n", + " # change earliest date\n", + " date.earliest = Date(year=date_from.year, month=date_from.month, day=date_from.day)\n", + "\n", + " # change precision and label\n", + " date.precision = DatePrecision.DAY\n", + " date.label = date_label\n", + "\n", + "if date_type == 'range':\n", + " xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n", + " xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n", + " # create day precision Undate from start date\n", + " date = Undate.parse(str(xsd_date_from), 'ISO8601')\n", + " # change latest date\n", + " date_until = datetime.date.fromisoformat(str(xsd_date_until))\n", + " date.latest = Date(year=date_until.year, month=date_until.month, day=date_until.day)\n", + " # change precision and label\n", + " date.precision = DatePrecision.DAY\n", + " date.label = date_label\n", + "\n", + "\n", + "print(f\"{date_label=} {date_type=} {calendar_type=} {date=}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "742ba275-7de6-461b-8891-6f06dbdd89a0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/use-cases/ismi/requirements.txt b/examples/use-cases/ismi/requirements.txt new file mode 100644 index 0000000..0d277bc --- /dev/null +++ b/examples/use-cases/ismi/requirements.txt @@ -0,0 +1,2 @@ +jupyterlab~=4.3.1 +rdflib~=7.1.1 From a2dfae65d3508505ee92ac245bfda8167e31b576 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Nov 2024 14:57:44 -0500 Subject: [PATCH 07/77] Preliminary hijri date parser --- .../converters/calendars/hijri/__init__.py | 0 .../converters/calendars/hijri/hijri.lark | 54 +++++++++++++++++++ .../converters/calendars/hijri/parser.py | 9 ++++ .../calendars/hijri/test_parser.py | 40 ++++++++++++++ 4 files changed, 103 insertions(+) create mode 100644 src/undate/converters/calendars/hijri/__init__.py create mode 100644 src/undate/converters/calendars/hijri/hijri.lark create mode 100644 src/undate/converters/calendars/hijri/parser.py create mode 100644 tests/test_converters/calendars/hijri/test_parser.py diff --git a/src/undate/converters/calendars/hijri/__init__.py b/src/undate/converters/calendars/hijri/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/undate/converters/calendars/hijri/hijri.lark b/src/undate/converters/calendars/hijri/hijri.lark new file mode 100644 index 0000000..56103a8 --- /dev/null +++ b/src/undate/converters/calendars/hijri/hijri.lark @@ -0,0 +1,54 @@ +%import common.WS +%ignore WS + +?date: year | month year | day month year | year month day +// NOTE: ISMI sample dates are year month day +// if we can assume years are 3 digits minimum, we can support year month day +// AND we can use + +// TODO: handle date ranges? + +// TODO: is there a minimum year we need to support? +// if we can assume 3+ digit years we can distinguish between days and years, +year: /\d{3,}/ + +// months +month: month_1 + | month_2 + | month_3 + | month_4 + | month_5 + | month_6 + | month_7 + | month_8 + | month_9 + | month_10 + | month_11 + | month_12 +// months have 29 or 30 days; we do not expect leading zeroes +day: /[1-9]/ | /[12][0-9]/ | /30/ + +// months, in order; from convertdate list +// with variants from Princeton Geniza Project +// support matching with and without accents +month_1: /al-Mu[ḥh]arram/ | /Mu[ḥh]arram/ +month_2: /[ṢS]afar/ +// Rabīʿ al-ʾAwwal or Rabi' I +month_3: /Rab[īi][ʿ']' (al-[`ʾ]Awwal|I)/ +// Rabīʿ ath-Thānī" or Rabi' II +month_4: /Rab[īi][ʿ'] (ath-Th[āa]n[īi]|Rabi' II)/ +// Jumādā al-ʾAwwal or Jumādā I +month_5: /Jum[āa]d[āa] (al-[ʾ`]Awwal|I)/ +// Jumādā ath-Thāniya or Jumādā II +month_6: /Jum[āa]d[āa] (ath-Th[āa]niyah|II)/ +month_7: "Rajab" +// Shaʿbān +month_8: /Sha[ʿ']b[āa]n/ +month_9: /Rama[ḍd][āa]n/ +month_10: /Shaww[āa]l/ +// Zū al-Qaʿdah or Dhu l-Qa'da +month_11: /(Z|Dh)[ūu] a?l-Qa[ʿ']dah?/ +// Zū al-Ḥijjah or Dhu l-Hijja +month_12: /(Z|Dh)[ūu] a?l-[HḤ]ijjah?/ + + diff --git a/src/undate/converters/calendars/hijri/parser.py b/src/undate/converters/calendars/hijri/parser.py new file mode 100644 index 0000000..e7e7691 --- /dev/null +++ b/src/undate/converters/calendars/hijri/parser.py @@ -0,0 +1,9 @@ +import pathlib + +from lark import Lark + +grammar_path = pathlib.Path(__file__).parent / "hijri.lark" + +with open(grammar_path) as grammar: + # NOTE: LALR parser is faster but requires assumption of 3+ digit years + hijri_parser = Lark(grammar.read(), start="date", parser="lalr") diff --git a/tests/test_converters/calendars/hijri/test_parser.py b/tests/test_converters/calendars/hijri/test_parser.py new file mode 100644 index 0000000..78f37b7 --- /dev/null +++ b/tests/test_converters/calendars/hijri/test_parser.py @@ -0,0 +1,40 @@ +import pytest +from undate.converters.calendars.hijri.parser import hijri_parser + + +# for now, just test that valid dates can be parsed + +testcases = [ + # year + "521", + # month + year + # - with and without accent + "al-Muḥarram 900", + "al-Muharram 900", + "Safar 581", + "Ṣafar 581", + # variant month name, with or without accent + "Muharram 900", + "Muḥarram 900", + "Jumādā al-ʾAwwal 1081", + "Jumada al-`Awwal 1081", + "Jumādā I 1081", + "Jumādā ath-Thāniyah 901", + "Jumada ath-Thaniyah 901", + "Jumādā II 981", + "Shaʿbān 900", + "Sha'ban 900", + "Ramaḍān 903", + "Ramadan 903", + "Zū al-Qaʿdah 124", + "Dhu l-Qa'da 124", + # day month year + "7 Jumādā I 1243", + "29 Muḥarram 1243", + "30 Muḥarram 1243", +] + + +@pytest.mark.parametrize("date_string", testcases) +def test_should_parse(date_string): + assert hijri_parser.parse(date_string) From ed23f6c48a2a536511010c75f4b70ac230bd8832 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Nov 2024 15:10:38 -0500 Subject: [PATCH 08/77] Test all Hijri months; assume 3+ digit years and use LALR parser --- src/undate/converters/calendars/hijri/hijri.lark | 13 ++++++------- .../test_converters/calendars/hijri/test_parser.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/undate/converters/calendars/hijri/hijri.lark b/src/undate/converters/calendars/hijri/hijri.lark index 56103a8..c554a52 100644 --- a/src/undate/converters/calendars/hijri/hijri.lark +++ b/src/undate/converters/calendars/hijri/hijri.lark @@ -1,10 +1,9 @@ %import common.WS %ignore WS -?date: year | month year | day month year | year month day +?date: year | month year | day month year | year month | year month day // NOTE: ISMI sample dates are year month day -// if we can assume years are 3 digits minimum, we can support year month day -// AND we can use +// if we can assume years are 3 digits minimum, we can support year month day AND we can use faster LALR parser // TODO: handle date ranges? @@ -28,15 +27,15 @@ month: month_1 // months have 29 or 30 days; we do not expect leading zeroes day: /[1-9]/ | /[12][0-9]/ | /30/ -// months, in order; from convertdate list +// months, in order; from convertdate list // with variants from Princeton Geniza Project // support matching with and without accents month_1: /al-Mu[ḥh]arram/ | /Mu[ḥh]arram/ month_2: /[ṢS]afar/ // Rabīʿ al-ʾAwwal or Rabi' I -month_3: /Rab[īi][ʿ']' (al-[`ʾ]Awwal|I)/ -// Rabīʿ ath-Thānī" or Rabi' II -month_4: /Rab[īi][ʿ'] (ath-Th[āa]n[īi]|Rabi' II)/ +month_3: /Rab[īi][ʿ'] (al-[`ʾ]Awwal|I)/ +// Rabīʿ ath-Thānī or Rabi' II +month_4: /Rab[īi][ʿ'] (ath-Th[āa]n[īi]|II)/ // Jumādā al-ʾAwwal or Jumādā I month_5: /Jum[āa]d[āa] (al-[ʾ`]Awwal|I)/ // Jumādā ath-Thāniya or Jumādā II diff --git a/tests/test_converters/calendars/hijri/test_parser.py b/tests/test_converters/calendars/hijri/test_parser.py index 78f37b7..dc31620 100644 --- a/tests/test_converters/calendars/hijri/test_parser.py +++ b/tests/test_converters/calendars/hijri/test_parser.py @@ -16,22 +16,33 @@ # variant month name, with or without accent "Muharram 900", "Muḥarram 900", + "Rabīʿ al-ʾAwwal 901", + "Rabi' I 901", + "Rabīʿ ath-Thānī 343", + "Rabīʿ II 343", "Jumādā al-ʾAwwal 1081", "Jumada al-`Awwal 1081", "Jumādā I 1081", "Jumādā ath-Thāniyah 901", "Jumada ath-Thaniyah 901", "Jumādā II 981", + "Rajab 942", "Shaʿbān 900", "Sha'ban 900", "Ramaḍān 903", "Ramadan 903", + "Shawwāl 1042", + "Shawwal 1042", "Zū al-Qaʿdah 124", "Dhu l-Qa'da 124", # day month year "7 Jumādā I 1243", "29 Muḥarram 1243", "30 Muḥarram 1243", + # year month, if we can assume 3+ digit years + "901 Rabīʿ I", + # year month day + "901 Rabīʿ I 12", ] From 646f739ad0f02de7d79dce01cc24a87b0ca4422e Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Nov 2024 15:11:11 -0500 Subject: [PATCH 09/77] Clean up edtf parser code (remove old test case comments) --- src/undate/converters/edtf/edtf.lark | 2 +- src/undate/converters/edtf/parser.py | 41 ++-------------------------- 2 files changed, 3 insertions(+), 40 deletions(-) diff --git a/src/undate/converters/edtf/edtf.lark b/src/undate/converters/edtf/edtf.lark index e6f3a15..677fa98 100644 --- a/src/undate/converters/edtf/edtf.lark +++ b/src/undate/converters/edtf/edtf.lark @@ -16,7 +16,7 @@ date: year | year "-" month | year "-" month "-" day year: /-?\d+/ month: /(0[1-9])|(1[0-2])/ -day: /([0-2][1-9])|(3[0-1])/ +day: /([0-2][0-9])|(3[0-1])/ timeinterval: date "/" date diff --git a/src/undate/converters/edtf/parser.py b/src/undate/converters/edtf/parser.py index 6ab5139..27c2bd6 100644 --- a/src/undate/converters/edtf/parser.py +++ b/src/undate/converters/edtf/parser.py @@ -1,45 +1,8 @@ -import os.path +import pathlib from lark import Lark -grammar_path = os.path.join(os.path.dirname(__file__), "edtf.lark") +grammar_path = pathlib.Path(__file__).parent / "edtf.lark" with open(grammar_path) as grammar: edtf_parser = Lark(grammar.read(), start="edtf") - - -# testcases = [ -# "1984", -# "1984-05", -# "1984-12", -# "1001-03-30", -# "1000/2000", -# "1000-01/2000-05-01", -# # level 1 -# "Y170000002", -# "2001-21", # spring 2001 -# # qualifiers -# "1984?", -# "2004-06~", -# "2004-06-11%", -# # unspecified digits from right -# "201X", -# "20XX", -# "2004-XX", -# "1985-04-XX", -# "1985-XX-XX", -# # open ended intervals -# "1985-04-12/..", -# "1985-04/..", -# "../1985-04-12", -# "/1985-04-12", -# "1984-13", -# ] - -# for testcase in testcases: -# print(f"\n{testcase}") -# tree = edtf_parser.parse(testcase) -# print(tree.pretty()) - - -# error_cases = ["1984-13", "Y1702"] From 51850cc0d4a71903f366c5a4146f07a29ee7af95 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Nov 2024 16:36:23 -0500 Subject: [PATCH 10/77] Add transformer for hijri parser to convert parsed date to undate --- pyproject.toml | 2 +- .../converters/calendars/hijri/hijri.lark | 2 +- .../converters/calendars/hijri/parser.py | 2 +- .../converters/calendars/hijri/transformer.py | 56 +++++++++++++++++++ src/undate/undate.py | 5 +- .../calendars/hijri/test_hijri_transformer.py | 56 +++++++++++++++++++ 6 files changed, 119 insertions(+), 4 deletions(-) create mode 100644 src/undate/converters/calendars/hijri/transformer.py create mode 100644 tests/test_converters/calendars/hijri/test_hijri_transformer.py diff --git a/pyproject.toml b/pyproject.toml index 9179ca0..da206e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" license = { text = "Apache-2" } requires-python = ">= 3.9" dynamic = ["version"] -dependencies = ["lark", "numpy"] +dependencies = ["lark[interegular]", "numpy", "convertdate"] authors = [ { name = "Rebecca Sutton Koeser" }, { name = "Cole Crawford" }, diff --git a/src/undate/converters/calendars/hijri/hijri.lark b/src/undate/converters/calendars/hijri/hijri.lark index c554a52..79b55c6 100644 --- a/src/undate/converters/calendars/hijri/hijri.lark +++ b/src/undate/converters/calendars/hijri/hijri.lark @@ -1,7 +1,7 @@ %import common.WS %ignore WS -?date: year | month year | day month year | year month | year month day +hijri_date: year | month year | day month year | year month | year month day // NOTE: ISMI sample dates are year month day // if we can assume years are 3 digits minimum, we can support year month day AND we can use faster LALR parser diff --git a/src/undate/converters/calendars/hijri/parser.py b/src/undate/converters/calendars/hijri/parser.py index e7e7691..df13a40 100644 --- a/src/undate/converters/calendars/hijri/parser.py +++ b/src/undate/converters/calendars/hijri/parser.py @@ -6,4 +6,4 @@ with open(grammar_path) as grammar: # NOTE: LALR parser is faster but requires assumption of 3+ digit years - hijri_parser = Lark(grammar.read(), start="date", parser="lalr") + hijri_parser = Lark(grammar.read(), start="hijri_date", strict=True, parser="lalr") diff --git a/src/undate/converters/calendars/hijri/transformer.py b/src/undate/converters/calendars/hijri/transformer.py new file mode 100644 index 0000000..10f5239 --- /dev/null +++ b/src/undate/converters/calendars/hijri/transformer.py @@ -0,0 +1,56 @@ +from lark import Transformer, Tree +from convertdate import islamic + +from undate.undate import Undate, UndateInterval + + +class HijriDateTransformer(Transformer): + """Transform a Hijri date parse tree and return an Undate or + UndateInterval.""" + + def hijri_date(self, items): + parts = {} + for child in items: + if child.data in ["year", "month", "day"]: + # in each case we expect one integer value; + # anonymous tokens convert to their value and cast as int + value = int(child.children[0]) + parts[str(child.data)] = value + + # if we have a year, month, day, convert to a single undate + if len(parts.values()) == 3: + # convertdate returns a tuple of year, month day + converted_date = islamic.to_gregorian(**parts) + return Undate(*converted_date) + + # if not, convert to a date range + start, end = islamic_to_gregorian_interval(**parts) + # TODO: should we add optional date precision / interval length + # to UndateInteravl ? + return UndateInterval(Undate(*start), Undate(*end)) + + # this does nothing + # def year(self, items): + # return Tree(data="year", children=[items[0]]) + + def month(self, items): + # month has a nested tree for the rule and the value + # the name of the rule (month_1, month_2, etc) gives us the + # number of the month needed for converting the date + tree = items[0] + month_n = tree.data.split("_")[-1] + return Tree(data="month", children=[month_n]) + + +MIN_MONTH, MIN_DAY = 1, 1 +MAX_MONTH = 12 + + +def islamic_to_gregorian_interval(year, month=None, day=None): + start = (year, month or MIN_MONTH, day or MIN_DAY) + end_month = month or MAX_MONTH + # islamic calendar converter has month_length + if day is None: + day = islamic.month_length(year, end_month) + end = (year, month or MAX_MONTH, day) + return (islamic.to_gregorian(*start), islamic.to_gregorian(*end)) diff --git a/src/undate/undate.py b/src/undate/undate.py index 7df7634..f848474 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -442,11 +442,14 @@ class UndateInterval: :type label: `str` """ - # date range between two uncertain dates + # date range between two undates earliest: Union[Undate, None] latest: Union[Undate, None] label: Union[str, None] + # TODO: let's think about adding an optional precision / length /size field + # using DatePrecision + def __init__( self, earliest: Optional[Undate] = None, diff --git a/tests/test_converters/calendars/hijri/test_hijri_transformer.py b/tests/test_converters/calendars/hijri/test_hijri_transformer.py new file mode 100644 index 0000000..096bae7 --- /dev/null +++ b/tests/test_converters/calendars/hijri/test_hijri_transformer.py @@ -0,0 +1,56 @@ +import pytest +from undate.converters.calendars.hijri.parser import hijri_parser +from undate.converters.calendars.hijri.transformer import HijriDateTransformer +from undate.undate import Undate, UndateInterval +from undate.date import DatePrecision + +testcases = [ + # examples from Princeton Geniza Project + # date conversions checked with https://www.muqawwim.com/ + # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE) + ("7 Jumādā I 1243", Undate(1827, 11, 26), DatePrecision.DAY), + ( + "Jumādā I 1243", + UndateInterval(Undate(1827, 11, 20), Undate(1827, 12, 19)), + DatePrecision.MONTH, + ), + ( + "1243", + UndateInterval(Undate(1827, 7, 25), Undate(1828, 7, 13)), + DatePrecision.YEAR, + ), + ("27 Dhū l-Qaʿda 632", Undate(1235, 8, 20), DatePrecision.DAY), + ( + "Rajab 495", + UndateInterval(Undate(1102, 4, 28), Undate(1102, 5, 27)), + DatePrecision.MONTH, + ), + ( + "441", + UndateInterval(Undate(1049, 6, 11), Undate(1050, 5, 31)), + DatePrecision.YEAR, + ), + # examples from ISMI data + ("901 Rabīʿ I 14", Undate(1495, 12, 11), DatePrecision.DAY), + ( + "884", + UndateInterval(Undate(1479, 4, 3), Undate(1480, 3, 21)), + DatePrecision.YEAR, + ), + # add when we support parsing ranges: + # 900 Muḥarram 1 - 999 Ḏu al-Ḥijjaẗ 29 : 1494-10-11 to 1591-10-18 +] + + +@pytest.mark.parametrize("date_string,expected,expected_precision", testcases) +def test_transform(date_string, expected, expected_precision): + transformer = HijriDateTransformer(visit_tokens=True) + # parse the input string, then transform to undate object + parsetree = hijri_parser.parse(date_string) + transformed_date = transformer.transform(parsetree) + assert transformed_date == expected + # currently only undates have date precision + if isinstance(transformed_date, Undate): + assert transformed_date.precision == expected_precision + # transformer doesn't have access to date string, + # label will need to be set by the converter class From 50f233185fbd5cc04c85cff4492bf767c7eeb0ed Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Nov 2024 16:37:21 -0500 Subject: [PATCH 11/77] Rename test directories & files to be consistent & explicit --- .../test_hijri/test_hijri_parser.py} | 0 .../hijri => test_calendars/test_hijri}/test_hijri_transformer.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/test_converters/{calendars/hijri/test_parser.py => test_calendars/test_hijri/test_hijri_parser.py} (100%) rename tests/test_converters/{calendars/hijri => test_calendars/test_hijri}/test_hijri_transformer.py (100%) diff --git a/tests/test_converters/calendars/hijri/test_parser.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py similarity index 100% rename from tests/test_converters/calendars/hijri/test_parser.py rename to tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py diff --git a/tests/test_converters/calendars/hijri/test_hijri_transformer.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py similarity index 100% rename from tests/test_converters/calendars/hijri/test_hijri_transformer.py rename to tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py From 778c67b2b98f97ac293bdbb3ff34a02c322cb72f Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Nov 2024 16:58:21 -0500 Subject: [PATCH 12/77] Add an undate converter to wire in hijri date parsing capability --- src/undate/converters/calendars/__init__.py | 3 ++ .../converters/calendars/hijri/__init__.py | 3 ++ .../converters/calendars/hijri/converter.py | 48 +++++++++++++++++++ .../test_hijri/test_hijri_converter.py | 32 +++++++++++++ 4 files changed, 86 insertions(+) create mode 100644 src/undate/converters/calendars/__init__.py create mode 100644 src/undate/converters/calendars/hijri/converter.py create mode 100644 tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py diff --git a/src/undate/converters/calendars/__init__.py b/src/undate/converters/calendars/__init__.py new file mode 100644 index 0000000..edc3efc --- /dev/null +++ b/src/undate/converters/calendars/__init__.py @@ -0,0 +1,3 @@ +from undate.converters.calendars.hijri import HijriDateConverter + +__all__ = ["HijriDateConverter"] diff --git a/src/undate/converters/calendars/hijri/__init__.py b/src/undate/converters/calendars/hijri/__init__.py index e69de29..4ac5b4b 100644 --- a/src/undate/converters/calendars/hijri/__init__.py +++ b/src/undate/converters/calendars/hijri/__init__.py @@ -0,0 +1,3 @@ +from undate.converters.calendars.hijri.converter import HijriDateConverter + +__all__ = ["HijriDateConverter"] diff --git a/src/undate/converters/calendars/hijri/converter.py b/src/undate/converters/calendars/hijri/converter.py new file mode 100644 index 0000000..0502a12 --- /dev/null +++ b/src/undate/converters/calendars/hijri/converter.py @@ -0,0 +1,48 @@ +from typing import Union + +from lark.exceptions import UnexpectedCharacters + +from undate.converters.base import BaseDateConverter +from undate.converters.calendars.hijri.parser import hijri_parser +from undate.converters.calendars.hijri.transformer import HijriDateTransformer +from undate.undate import Undate, UndateInterval + + +class HijriDateConverter(BaseDateConverter): + """ + Converter for Hijri / Islamic calendar. + + Support for parsing Hijri dates and converting to Undate and UndateInterval + objects in the Gregorian calendar. + """ + + #: converter name: Hijri + name: str = "Hijri" + calendar_name: str = "Hijrī" + + def __init__(self): + self.transformer = HijriDateTransformer() + + def parse(self, value: str) -> Union[Undate, UndateInterval]: + """ + Parse a Hijri date string and return an :class:`~undate.undate.Undate` or + :class:`~undate.undate.UndateInterval` in Gregorian calendar. + The Hijri date string is preserved in the undate label + """ + if not value: + raise ValueError("Parsing empty string is not supported") + + # parse the input string, then transform to undate object + try: + # parse the string with our Hijri date parser + parsetree = hijri_parser.parse(value) + # transform the parse tree into an undate or undate interval + undate_obj = self.transformer.transform(parsetree) + # set the original date as a label, with the calendar name + undate_obj.label = f"{value} {self.calendar_name}" + return undate_obj + except UnexpectedCharacters: + raise ValueError("Could not parse '%s' as a Hijri date" % value) + + # do we need to support conversion the other direction? + # i.e., generate a Hijri date from an abitrary undate or undate interval? diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py new file mode 100644 index 0000000..6493083 --- /dev/null +++ b/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py @@ -0,0 +1,32 @@ +import pytest +from undate.converters.calendars import HijriDateConverter +from undate.undate import Undate, UndateInterval + + +class TestHijriDateConverter: + def test_parse_(self): + # day + date_str = "7 Jumādā I 1243" + date = HijriDateConverter().parse(date_str) + assert date == Undate(1827, 11, 26) + assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" + + # month + date_str = "Rajab 495" + date = HijriDateConverter().parse(date_str) + assert date == UndateInterval(Undate(1102, 4, 28), Undate(1102, 5, 27)) + assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" + + # year + date_str = "441" + date = HijriDateConverter().parse(date_str) + assert date == UndateInterval(Undate(1049, 6, 11), Undate(1050, 5, 31)) + assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" + + def test_parse_error(self): + # a string we can't parse should raise an error + with pytest.raises(ValueError): + HijriDateConverter().parse("January 2, 1991") + # empty string should also error + with pytest.raises(ValueError): + HijriDateConverter().parse("") From 99c06119695d63942c4a18a0aec1ec4f9205e682 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Nov 2024 17:07:35 -0500 Subject: [PATCH 13/77] Tell mypy to ignore that convertdate code is untyped --- src/undate/converters/calendars/hijri/transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/undate/converters/calendars/hijri/transformer.py b/src/undate/converters/calendars/hijri/transformer.py index 10f5239..31d0992 100644 --- a/src/undate/converters/calendars/hijri/transformer.py +++ b/src/undate/converters/calendars/hijri/transformer.py @@ -1,5 +1,5 @@ from lark import Transformer, Tree -from convertdate import islamic +from convertdate import islamic # type: ignore from undate.undate import Undate, UndateInterval From 315ad7a9db81f033daaa15bf2f9fa9316d2c32ba Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Nov 2024 17:32:00 -0500 Subject: [PATCH 14/77] Clean up one more date and add more possible todos --- src/undate/converters/calendars/hijri/hijri.lark | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/undate/converters/calendars/hijri/hijri.lark b/src/undate/converters/calendars/hijri/hijri.lark index 79b55c6..ae8d73e 100644 --- a/src/undate/converters/calendars/hijri/hijri.lark +++ b/src/undate/converters/calendars/hijri/hijri.lark @@ -7,6 +7,10 @@ hijri_date: year | month year | day month year | year month | year month day // TODO: handle date ranges? +// TODO: add support for qualifiers? +// PGP dates use qualifiers like "first decade of" (for beginning of month) +// "first third of", seasons (can look for more examples) + // TODO: is there a minimum year we need to support? // if we can assume 3+ digit years we can distinguish between days and years, year: /\d{3,}/ @@ -30,7 +34,8 @@ day: /[1-9]/ | /[12][0-9]/ | /30/ // months, in order; from convertdate list // with variants from Princeton Geniza Project // support matching with and without accents -month_1: /al-Mu[ḥh]arram/ | /Mu[ḥh]arram/ +// al-Muḥarram or Muḥarram +month_1: /(al-)?Mu[ḥh]arram/ month_2: /[ṢS]afar/ // Rabīʿ al-ʾAwwal or Rabi' I month_3: /Rab[īi][ʿ'] (al-[`ʾ]Awwal|I)/ From 18c8f259922bc578575172efab74ec38056b83bc Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 21 Nov 2024 17:35:29 -0500 Subject: [PATCH 15/77] Update src/undate/converters/calendars/hijri/transformer.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- src/undate/converters/calendars/hijri/transformer.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/undate/converters/calendars/hijri/transformer.py b/src/undate/converters/calendars/hijri/transformer.py index 31d0992..c3a059c 100644 --- a/src/undate/converters/calendars/hijri/transformer.py +++ b/src/undate/converters/calendars/hijri/transformer.py @@ -47,10 +47,20 @@ def month(self, items): def islamic_to_gregorian_interval(year, month=None, day=None): + """Convert partial Hijri date to a Gregorian date interval. + + Args: + year (int): Hijri year + month (int, optional): Hijri month (1-12) + day (int, optional): Hijri day (1-30) + + Returns: + tuple: (start_date, end_date) as tuples of (year, month, day) + """ start = (year, month or MIN_MONTH, day or MIN_DAY) end_month = month or MAX_MONTH # islamic calendar converter has month_length if day is None: day = islamic.month_length(year, end_month) - end = (year, month or MAX_MONTH, day) + end = (year, end_month, day) # Use end_month instead of redundant check return (islamic.to_gregorian(*start), islamic.to_gregorian(*end)) From f3ce58b6f714726c372c770006eaea3a6d641fb6 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 21 Nov 2024 17:35:53 -0500 Subject: [PATCH 16/77] Update src/undate/converters/edtf/edtf.lark Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- src/undate/converters/edtf/edtf.lark | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/undate/converters/edtf/edtf.lark b/src/undate/converters/edtf/edtf.lark index 677fa98..8587599 100644 --- a/src/undate/converters/edtf/edtf.lark +++ b/src/undate/converters/edtf/edtf.lark @@ -16,7 +16,7 @@ date: year | year "-" month | year "-" month "-" day year: /-?\d+/ month: /(0[1-9])|(1[0-2])/ -day: /([0-2][0-9])|(3[0-1])/ +day: /(0[1-9])|([12][0-9])|(3[01])/ timeinterval: date "/" date From 11cc007bca8409118b8ef25f2a900c5e7c665cfa Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 21 Nov 2024 17:36:19 -0500 Subject: [PATCH 17/77] Update src/undate/converters/calendars/hijri/converter.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- src/undate/converters/calendars/hijri/converter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/undate/converters/calendars/hijri/converter.py b/src/undate/converters/calendars/hijri/converter.py index 0502a12..e040d6e 100644 --- a/src/undate/converters/calendars/hijri/converter.py +++ b/src/undate/converters/calendars/hijri/converter.py @@ -41,8 +41,8 @@ def parse(self, value: str) -> Union[Undate, UndateInterval]: # set the original date as a label, with the calendar name undate_obj.label = f"{value} {self.calendar_name}" return undate_obj - except UnexpectedCharacters: - raise ValueError("Could not parse '%s' as a Hijri date" % value) + except UnexpectedCharacters as err: + raise ValueError(f"Could not parse '{value}' as a Hijri date") from err # do we need to support conversion the other direction? # i.e., generate a Hijri date from an abitrary undate or undate interval? From 2cc596eb9a130270626b0ad5f257ebd2c0832dd8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Nov 2024 17:38:09 -0500 Subject: [PATCH 18/77] Add more error cases for EDTF and Hijri parser tests --- .../test_converters/edtf/test_edtf_parser.py | 3 ++- .../test_hijri/test_hijri_parser.py | 21 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/test_converters/edtf/test_edtf_parser.py b/tests/test_converters/edtf/test_edtf_parser.py index e9a3fdb..73d4e02 100644 --- a/tests/test_converters/edtf/test_edtf_parser.py +++ b/tests/test_converters/edtf/test_edtf_parser.py @@ -8,6 +8,7 @@ "1984-05", "1984-12", "1001-03-30", + "1901-02-20", "1000/2000", "1000-01/2000-05-01", # level 1 @@ -45,7 +46,7 @@ def test_should_parse(date_string): assert edtf_parser.parse(date_string) -error_cases = ["1984-13", "Y1702"] +error_cases = ["1984-13", "Y1702", "1984-00", "1984-01-00"] @pytest.mark.parametrize("date_string", error_cases) diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py index dc31620..9d465ef 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py +++ b/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py @@ -49,3 +49,24 @@ @pytest.mark.parametrize("date_string", testcases) def test_should_parse(date_string): assert hijri_parser.parse(date_string) + + +error_cases = [ + # invalid days + "0 Muḥarram 1243", + "31 Muḥarram 1243", + # month alone + "Shawwal", + # month day only + "12 Shawwal", + # invalid month + "Foo 383", + # wrong format + "2024-10-02", +] + + +@pytest.mark.parametrize("date_string", error_cases) +def test_should_error(date_string): + with pytest.raises(Exception): + hijri_parser.parse(date_string) From b994eb2b155d16dcf98122112148a6d60c379cfb Mon Sep 17 00:00:00 2001 From: Robert Casties Date: Fri, 22 Nov 2024 13:51:53 +0100 Subject: [PATCH 19/77] move notebook to separate branch --- examples/use-cases/ismi/ismi-dates.ipynb | 179 ----------------------- examples/use-cases/ismi/requirements.txt | 2 - 2 files changed, 181 deletions(-) delete mode 100644 examples/use-cases/ismi/ismi-dates.ipynb delete mode 100644 examples/use-cases/ismi/requirements.txt diff --git a/examples/use-cases/ismi/ismi-dates.ipynb b/examples/use-cases/ismi/ismi-dates.ipynb deleted file mode 100644 index 098fbfd..0000000 --- a/examples/use-cases/ismi/ismi-dates.ipynb +++ /dev/null @@ -1,179 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0fa36628-ccf2-4977-8c4c-e0a85e2b37b6", - "metadata": {}, - "source": [ - "# Working with ISMI project dates" - ] - }, - { - "cell_type": "markdown", - "id": "ffd4b544-8957-494e-9e09-b703d68bb7df", - "metadata": {}, - "source": [ - "## Load date samples from RDF" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a8a36e7f-6057-44d1-8466-6709910d4249", - "metadata": {}, - "outputs": [], - "source": [ - "from rdflib import Graph, RDF, URIRef\n", - "from rdflib.namespace import Namespace, RDFS\n", - "from undate.undate import Undate\n", - "\n", - "# additional RDF namespaces\n", - "crmNs = Namespace('http://www.cidoc-crm.org/cidoc-crm/')\n", - "\n", - "g = Graph()\n", - "g.bind('crm', crmNs)\n", - "# load ISMI RDF sample data\n", - "g.parse('data/ismi-crm-date-samples.ttl')\n", - "# check: number of triples\n", - "len(g)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c940ca2b-b369-4511-8dc9-420bdaeb3e65", - "metadata": {}, - "outputs": [], - "source": [ - "date_uris = [u for u in g.subjects(RDF.type, crmNs['E52_Time-Span'])]\n", - "\n", - "for uri in date_uris:\n", - " q = '''SELECT ?uri ?label ?note \n", - " WHERE { \n", - " ?uri crm:P3_has_note ?note ;\n", - " crm:P1_is_identified_by / rdfs:label ?label .\n", - " } limit 10'''\n", - " res = g.query(q, initBindings={'uri': uri})\n", - " for r in res:\n", - " print(f\"uri={str(uri)} label={r.label} note={r.note}\")" - ] - }, - { - "cell_type": "markdown", - "id": "16361060-657f-431c-b70f-9101d550aa38", - "metadata": {}, - "source": [ - "## Convert RDF dates to Undate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e443b974-930b-4a5d-8f21-641b4556b159", - "metadata": {}, - "outputs": [], - "source": [ - "from undate.date import DatePrecision, Date\n", - "import datetime\n", - "\n", - "uri = date_uris[1]\n", - "\n", - "#\n", - "# read date type\n", - "#\n", - "date_type = None\n", - "for date_type_uri in g.objects(uri, crmNs.P2_has_type):\n", - " for dt in ['day', 'year', 'range']:\n", - " if str(date_type_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/date/' + dt:\n", - " date_type = dt\n", - "\n", - "if not date_type:\n", - " raise RuntimeError(f\"Unknown datetype URI {date_type_uri}\")\n", - "\n", - "#\n", - "# read label and calendar\n", - "#\n", - "date_label_uri = next(g.objects(uri, crmNs.P1_is_identified_by))\n", - "date_label = str(next(g.objects(date_label_uri, RDFS.label)))\n", - "for date_label_calendar_uri in g.objects(date_label_uri, crmNs.P2_has_type):\n", - " for ct in ['gregorian', 'julian', 'islamic']:\n", - " if str(date_label_calendar_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/calendar/' + ct:\n", - " calendar_type = ct\n", - "\n", - "if not calendar_type:\n", - " raise RuntimeError(f\"Unknown calendar type URI {date_label_calendar_uri}\")\n", - "\n", - "#\n", - "# create undate\n", - "#\n", - "if date_type == 'day':\n", - " xsd_date = next(g.objects(uri, crmNs.P82_at_some_time_within))\n", - " date = Undate.parse(str(xsd_date), 'ISO8601')\n", - " date.precision = DatePrecision.DAY\n", - " date.label = date_label\n", - "\n", - "if date_type == 'year':\n", - " xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n", - " xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n", - " date_from = datetime.date.fromisoformat(str(xsd_date_from))\n", - " if calendar_type == 'gregorian':\n", - " # this should be fine\n", - " date = Undate(year=date_from.year)\n", - "\n", - " else:\n", - " # create day precision Undate from end date\n", - " date = Undate.parse(str(xsd_date_until), 'ISO8601')\n", - " # change earliest date\n", - " date.earliest = Date(year=date_from.year, month=date_from.month, day=date_from.day)\n", - "\n", - " # change precision and label\n", - " date.precision = DatePrecision.DAY\n", - " date.label = date_label\n", - "\n", - "if date_type == 'range':\n", - " xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n", - " xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n", - " # create day precision Undate from start date\n", - " date = Undate.parse(str(xsd_date_from), 'ISO8601')\n", - " # change latest date\n", - " date_until = datetime.date.fromisoformat(str(xsd_date_until))\n", - " date.latest = Date(year=date_until.year, month=date_until.month, day=date_until.day)\n", - " # change precision and label\n", - " date.precision = DatePrecision.DAY\n", - " date.label = date_label\n", - "\n", - "\n", - "print(f\"{date_label=} {date_type=} {calendar_type=} {date=}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "742ba275-7de6-461b-8891-6f06dbdd89a0", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/use-cases/ismi/requirements.txt b/examples/use-cases/ismi/requirements.txt deleted file mode 100644 index 0d277bc..0000000 --- a/examples/use-cases/ismi/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -jupyterlab~=4.3.1 -rdflib~=7.1.1 From 0aac63a9de4fa62cafef821248cf302fcf860db7 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 22 Nov 2024 15:45:31 -0500 Subject: [PATCH 20/77] Add calendar field to Undate object --- src/undate/undate.py | 21 +++++++++++++++++++++ tests/test_undate.py | 13 ++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index f848474..137c799 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -1,6 +1,7 @@ import datetime import re from calendar import monthrange +from enum import StrEnum, auto # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Dict, Optional, Union @@ -9,6 +10,13 @@ from undate.date import ONE_DAY, ONE_MONTH_MAX, ONE_YEAR, Date, DatePrecision, Timedelta +class Calendar(StrEnum): + """Supported calendars""" + + GREGORIAN = auto() + HIJRI = auto() + + class Undate: """object for representing uncertain, fuzzy or partially unknown dates""" @@ -25,6 +33,8 @@ class Undate: converter: BaseDateConverter #: precision of the date (day, month, year, etc.) precision: DatePrecision + #: the calendar this date is using; Gregorian by default + calendar: Calendar = Calendar.GREGORIAN #: known non-leap year NON_LEAP_YEAR: int = 2022 @@ -43,6 +53,7 @@ def __init__( day: Optional[Union[int, str]] = None, converter: Optional[BaseDateConverter] = None, label: Optional[str] = None, + calendar: Optional[Union[str, Calendar]] = None, ): # keep track of initial values and which values are known # TODO: add validation: if str, must be expected length @@ -58,6 +69,16 @@ def __init__( elif year: self.precision = DatePrecision.YEAR + if calendar is not None: + # if not passed as a Calendar instance, do a lookup + if not isinstance(calendar, Calendar): + # look for calendar by upper-case name + try: + calendar = Calendar[calendar.upper()] + except KeyError: + raise ValueError(f"Calendar `{calendar}` is not supported") + self.calendar = calendar + # special case: treat year = XXXX as unknown/none if year == "XXXX": year = None diff --git a/tests/test_undate.py b/tests/test_undate.py index 65360d3..fd4c169 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -3,7 +3,7 @@ import pytest from undate.date import DatePrecision, Timedelta -from undate.undate import Undate, UndateInterval +from undate.undate import Undate, UndateInterval, Calendar class TestUndate: @@ -117,6 +117,17 @@ def test_init_partially_known_day(self): # (currently causes an exception because min/max years are not leap years) # Undate(None, 2, 29) + def test_calendar(self): + assert Undate(2024).calendar == Calendar.GREGORIAN + # by name, any case + assert Undate(848, calendar="HIJRI").calendar == Calendar.HIJRI + assert Undate(848, calendar="hijri").calendar == Calendar.HIJRI + # by enum + assert Undate(848, calendar=Calendar.HIJRI).calendar == Calendar.HIJRI + # invalid + with pytest.raises(ValueError, match="Calendar `foobar` is not supported"): + Undate(848, calendar="foobar") + def test_init_invalid(self): with pytest.raises(ValueError): Undate("19xx") From e2444edd36cf322fb46a451a5b5e166434439ca5 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 14:16:54 -0500 Subject: [PATCH 21/77] Partial refactor: initialize hijri dates as undate with hijri calendar --- .../converters/calendars/hijri/transformer.py | 33 ++++++---- src/undate/undate.py | 40 ++++++------ .../test_hijri/test_hijri_converter.py | 24 ++++++-- .../test_hijri/test_hijri_transformer.py | 61 +++++++++---------- 4 files changed, 93 insertions(+), 65 deletions(-) diff --git a/src/undate/converters/calendars/hijri/transformer.py b/src/undate/converters/calendars/hijri/transformer.py index c3a059c..cbb9387 100644 --- a/src/undate/converters/calendars/hijri/transformer.py +++ b/src/undate/converters/calendars/hijri/transformer.py @@ -1,7 +1,13 @@ from lark import Transformer, Tree from convertdate import islamic # type: ignore -from undate.undate import Undate, UndateInterval +from undate.undate import Undate, Calendar + + +class HijriUndate(Undate): + """Undate convience subclass; sets default calendar to Hijri.""" + + calendar = Calendar.HIJRI class HijriDateTransformer(Transformer): @@ -17,17 +23,20 @@ def hijri_date(self, items): value = int(child.children[0]) parts[str(child.data)] = value - # if we have a year, month, day, convert to a single undate - if len(parts.values()) == 3: - # convertdate returns a tuple of year, month day - converted_date = islamic.to_gregorian(**parts) - return Undate(*converted_date) - - # if not, convert to a date range - start, end = islamic_to_gregorian_interval(**parts) - # TODO: should we add optional date precision / interval length - # to UndateInteravl ? - return UndateInterval(Undate(*start), Undate(*end)) + print(f"*** initializing undate with {parts} and Hijri calendar") + return HijriUndate(**parts) + + # # if we have a year, month, day, convert to a single undate + # if len(parts.values()) == 3: + # # convertdate returns a tuple of year, month day + # converted_date = islamic.to_gregorian(**parts) + # return Undate(*converted_date) + + # # if not, convert to a date range + # start, end = islamic_to_gregorian_interval(**parts) + # # TODO: should we add optional date precision / interval length + # # to UndateInteravl ? + # return UndateInterval(Undate(*start), Undate(*end)) # this does nothing # def year(self, items): diff --git a/src/undate/undate.py b/src/undate/undate.py index 137c799..be5a813 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -69,20 +69,25 @@ def __init__( elif year: self.precision = DatePrecision.YEAR + self.label = label if calendar is not None: - # if not passed as a Calendar instance, do a lookup - if not isinstance(calendar, Calendar): - # look for calendar by upper-case name - try: - calendar = Calendar[calendar.upper()] - except KeyError: - raise ValueError(f"Calendar `{calendar}` is not supported") - self.calendar = calendar + self.set_calendar(calendar) # special case: treat year = XXXX as unknown/none if year == "XXXX": year = None + self.calculate_earliest_latest(year, month, day) + + if converter is None: + # import all subclass definitions; initialize the default + converter_cls = BaseDateConverter.available_converters()[ + self.DEFAULT_CONVERTER + ] + converter = converter_cls() + self.converter = converter + + def calculate_earliest_latest(self, year, month, day): if year is not None: # could we / should we use str.isnumeric here? try: @@ -159,15 +164,16 @@ def __init__( self.earliest = Date(min_year, min_month, min_day) self.latest = Date(max_year, max_month, max_day) - if converter is None: - # import all subclass definitions; initialize the default - converter_cls = BaseDateConverter.available_converters()[ - self.DEFAULT_CONVERTER - ] - converter = converter_cls() - self.converter = converter - - self.label = label + def set_calendar(self, calendar: Union[str, Calendar]): + if calendar is not None: + # if not passed as a Calendar instance, do a lookup + if not isinstance(calendar, Calendar): + # look for calendar by upper-case name + try: + calendar = Calendar[calendar.upper()] + except KeyError: + raise ValueError(f"Calendar `{calendar}` is not supported") + self.calendar = calendar def __str__(self) -> str: # if any portion of the date is partially known, construct diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py index 6493083..f74d412 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py +++ b/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py @@ -1,27 +1,43 @@ import pytest + from undate.converters.calendars import HijriDateConverter -from undate.undate import Undate, UndateInterval +from undate.undate import Undate, Calendar +from undate.date import DatePrecision class TestHijriDateConverter: def test_parse_(self): # day + # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 date_str = "7 Jumādā I 1243" date = HijriDateConverter().parse(date_str) - assert date == Undate(1827, 11, 26) + assert date == Undate(1243, 5, 7) + assert date.calendar == Calendar.HIJRI + assert date.precision == DatePrecision.DAY assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" + # TODO: earliest/latest should be converted to Gregorian + # assert date.earliest == Date(1827, 11, 26) + # assert date.latest == Date(1827, 11, 26) # month date_str = "Rajab 495" date = HijriDateConverter().parse(date_str) - assert date == UndateInterval(Undate(1102, 4, 28), Undate(1102, 5, 27)) + assert date == Undate(495, 7) # Rajab is month 7 + assert date.calendar == Calendar.HIJRI + assert date.precision == DatePrecision.MONTH assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" + # TODO: Gregorian earliest/ latest + # assert date == UndateInterval(Undate(1102, 4, 28), Undate(1102, 5, 27)) # year date_str = "441" date = HijriDateConverter().parse(date_str) - assert date == UndateInterval(Undate(1049, 6, 11), Undate(1050, 5, 31)) + assert date == Undate(441) + assert date.calendar == Calendar.HIJRI + assert date.precision == DatePrecision.YEAR assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" + # TODO: Gregorian earliest/ latest + # assert date == UndateInterval(Undate(1049, 6, 11), Undate(1050, 5, 31)) def test_parse_error(self): # a string we can't parse should raise an error diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py index 096bae7..0cb5aa0 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py +++ b/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py @@ -1,42 +1,39 @@ import pytest from undate.converters.calendars.hijri.parser import hijri_parser -from undate.converters.calendars.hijri.transformer import HijriDateTransformer -from undate.undate import Undate, UndateInterval +from undate.converters.calendars.hijri.transformer import ( + HijriDateTransformer, + HijriUndate, +) +from undate.undate import Undate, Calendar from undate.date import DatePrecision + +def test_hijri_undate(): + assert HijriUndate(848).calendar == Calendar.HIJRI + + testcases = [ # examples from Princeton Geniza Project # date conversions checked with https://www.muqawwim.com/ - # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE) - ("7 Jumādā I 1243", Undate(1827, 11, 26), DatePrecision.DAY), - ( - "Jumādā I 1243", - UndateInterval(Undate(1827, 11, 20), Undate(1827, 12, 19)), - DatePrecision.MONTH, - ), - ( - "1243", - UndateInterval(Undate(1827, 7, 25), Undate(1828, 7, 13)), - DatePrecision.YEAR, - ), - ("27 Dhū l-Qaʿda 632", Undate(1235, 8, 20), DatePrecision.DAY), - ( - "Rajab 495", - UndateInterval(Undate(1102, 4, 28), Undate(1102, 5, 27)), - DatePrecision.MONTH, - ), - ( - "441", - UndateInterval(Undate(1049, 6, 11), Undate(1050, 5, 31)), - DatePrecision.YEAR, - ), - # examples from ISMI data - ("901 Rabīʿ I 14", Undate(1495, 12, 11), DatePrecision.DAY), - ( - "884", - UndateInterval(Undate(1479, 4, 3), Undate(1480, 3, 21)), - DatePrecision.YEAR, - ), + # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 + ("7 Jumādā I 1243", HijriUndate(1243, 5, 7), DatePrecision.DAY), + ("Jumādā I 1243", HijriUndate(1243, 5), DatePrecision.MONTH), + # Gregorian: UndateInterval(Undate(1827, 11, 20), Undate(1827, 12, 19)) + ("1243", HijriUndate(1243), DatePrecision.YEAR), + # Gregorian: UndateInterval(Undate(1827, 7, 25), Undate(1828, 7, 13)), + # Zū al-Qaʿdah / Dhu l-Qa'da = month 11 + ("27 Dhū l-Qaʿda 632", HijriUndate(632, 11, 27), DatePrecision.DAY), + # Rajab = month 7 + ("Rajab 495", HijriUndate(495, 7), DatePrecision.MONTH), + # Gregorian: UndateInterval(Undate(1102, 4, 28), Undate(1102, 5, 27)), + ("441", HijriUndate(441), DatePrecision.YEAR), + # Gregorian: UndateInterval(Undate(1049, 6, 11), Undate(1050, 5, 31)), + # examples from ISMI data (reformatted to day month year) + # Rabi 1 = month 3 + ("14 Rabīʿ I 901", HijriUndate(901, 3, 14), DatePrecision.DAY), + # Gregorian: Undate(1495, 12, 11) + ("884", HijriUndate(884), DatePrecision.YEAR), + # Gregorian: UndateInterval(Undate(1479, 4, 3), Undate(1480, 3, 21)), # add when we support parsing ranges: # 900 Muḥarram 1 - 999 Ḏu al-Ḥijjaẗ 29 : 1494-10-11 to 1591-10-18 ] From 3aa462b2da2366c5fb5640845ecf6c09fb3eb8de Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 16:24:10 -0500 Subject: [PATCH 22/77] Use calendar converter to get max month/day and convert to gregorian --- src/undate/converters/calendars/__init__.py | 3 +- src/undate/converters/calendars/gregorian.py | 41 ++++++++++++ .../converters/calendars/hijri/converter.py | 15 +++++ .../converters/calendars/hijri/transformer.py | 43 ++----------- src/undate/undate.py | 63 ++++++++++--------- .../test_hijri/test_hijri_converter.py | 60 ++++++++++++++---- .../test_hijri/test_hijri_transformer.py | 4 -- tests/test_undate.py | 10 +++ 8 files changed, 154 insertions(+), 85 deletions(-) create mode 100644 src/undate/converters/calendars/gregorian.py diff --git a/src/undate/converters/calendars/__init__.py b/src/undate/converters/calendars/__init__.py index edc3efc..635af21 100644 --- a/src/undate/converters/calendars/__init__.py +++ b/src/undate/converters/calendars/__init__.py @@ -1,3 +1,4 @@ +from undate.converters.calendars.gregorian import GregorianDateConverter from undate.converters.calendars.hijri import HijriDateConverter -__all__ = ["HijriDateConverter"] +__all__ = ["HijriDateConverter", "GregorianDateConverter"] diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py new file mode 100644 index 0000000..2db1156 --- /dev/null +++ b/src/undate/converters/calendars/gregorian.py @@ -0,0 +1,41 @@ +from calendar import monthrange +from typing import Optional + +from undate.converters.base import BaseDateConverter + + +class GregorianDateConverter(BaseDateConverter): + """ + Converter class for Gregorian calendar. + """ + + #: converter name: Gregorian + name: str = "Gregorian" + calendar_name: str = "Gregorian" + + #: known non-leap year + NON_LEAP_YEAR: int = 2022 + + def max_month(self, year: int) -> int: + """Maximum month for this calendar for this year""" + return 12 + + def max_day(self, year: Optional[int] = None, month: Optional[int] = None) -> int: + # if month is known, use that to calculate + if month: + # if year is known, use it; otherwise use a known non-leap year + # (only matters for February) + year = year or self.NON_LEAP_YEAR + + # Use monthrange from python builtin calendar module. + # returns first day of the month and number of days in the month + # for the specified year and month. + _, max_day = monthrange(year, month) + else: + # if year and month are unknown, return maximum possible + max_day = 31 + + return max_day + + def to_gregorian(self, year, month, day) -> tuple[int, int, int]: + return (year, month, day) diff --git a/src/undate/converters/calendars/hijri/converter.py b/src/undate/converters/calendars/hijri/converter.py index e040d6e..5c694f8 100644 --- a/src/undate/converters/calendars/hijri/converter.py +++ b/src/undate/converters/calendars/hijri/converter.py @@ -1,5 +1,6 @@ from typing import Union +from convertdate import islamic # type: ignore from lark.exceptions import UnexpectedCharacters from undate.converters.base import BaseDateConverter @@ -23,6 +24,20 @@ class HijriDateConverter(BaseDateConverter): def __init__(self): self.transformer = HijriDateTransformer() + def max_month(self, year: int) -> int: + """maximum numeric month for the specified year in this calendar""" + return 12 + + def max_day(self, year: int, month: int) -> int: + """maximum numeric day for the specified year and month in this calendar""" + return islamic.month_length(year, month) + + def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: + """Convert a Hijri date, specified by year, month, and day, + to the Gregorian equivalent date. Returns a tuple of year, month, day. + """ + return islamic.to_gregorian(year, month, day) + def parse(self, value: str) -> Union[Undate, UndateInterval]: """ Parse a Hijri date string and return an :class:`~undate.undate.Undate` or diff --git a/src/undate/converters/calendars/hijri/transformer.py b/src/undate/converters/calendars/hijri/transformer.py index cbb9387..b575df9 100644 --- a/src/undate/converters/calendars/hijri/transformer.py +++ b/src/undate/converters/calendars/hijri/transformer.py @@ -1,5 +1,4 @@ from lark import Transformer, Tree -from convertdate import islamic # type: ignore from undate.undate import Undate, Calendar @@ -23,22 +22,12 @@ def hijri_date(self, items): value = int(child.children[0]) parts[str(child.data)] = value - print(f"*** initializing undate with {parts} and Hijri calendar") + # initialize and return an undate with islamic year, month, day and + # islamic calendar return HijriUndate(**parts) - # # if we have a year, month, day, convert to a single undate - # if len(parts.values()) == 3: - # # convertdate returns a tuple of year, month day - # converted_date = islamic.to_gregorian(**parts) - # return Undate(*converted_date) - - # # if not, convert to a date range - # start, end = islamic_to_gregorian_interval(**parts) - # # TODO: should we add optional date precision / interval length - # # to UndateInteravl ? - # return UndateInterval(Undate(*start), Undate(*end)) - - # this does nothing + # year translation is not needed since we want a tree with name year + # this is equivalent to a no-op # def year(self, items): # return Tree(data="year", children=[items[0]]) @@ -49,27 +38,3 @@ def month(self, items): tree = items[0] month_n = tree.data.split("_")[-1] return Tree(data="month", children=[month_n]) - - -MIN_MONTH, MIN_DAY = 1, 1 -MAX_MONTH = 12 - - -def islamic_to_gregorian_interval(year, month=None, day=None): - """Convert partial Hijri date to a Gregorian date interval. - - Args: - year (int): Hijri year - month (int, optional): Hijri month (1-12) - day (int, optional): Hijri day (1-30) - - Returns: - tuple: (start_date, end_date) as tuples of (year, month, day) - """ - start = (year, month or MIN_MONTH, day or MIN_DAY) - end_month = month or MAX_MONTH - # islamic calendar converter has month_length - if day is None: - day = islamic.month_length(year, end_month) - end = (year, end_month, day) # Use end_month instead of redundant check - return (islamic.to_gregorian(*start), islamic.to_gregorian(*end)) diff --git a/src/undate/undate.py b/src/undate/undate.py index be5a813..8500bf8 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -1,6 +1,5 @@ import datetime import re -from calendar import monthrange from enum import StrEnum, auto # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None @@ -16,6 +15,13 @@ class Calendar(StrEnum): GREGORIAN = auto() HIJRI = auto() + @staticmethod + def get_converter(calendar): + # calendar converter must be available with a name matching + # the title-case name of the calendar enum entry + converter_cls = BaseDateConverter.available_converters()[calendar.value.title()] + return converter_cls() + class Undate: """object for representing uncertain, fuzzy or partially unknown dates""" @@ -36,8 +42,6 @@ class Undate: #: the calendar this date is using; Gregorian by default calendar: Calendar = Calendar.GREGORIAN - #: known non-leap year - NON_LEAP_YEAR: int = 2022 # numpy datetime is stored as 64-bit integer, so min/max # depends on the time unit; assume days for now # See https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units @@ -72,10 +76,7 @@ def __init__( self.label = label if calendar is not None: self.set_calendar(calendar) - - # special case: treat year = XXXX as unknown/none - if year == "XXXX": - year = None + self.calendar_converter = Calendar.get_converter(self.calendar) self.calculate_earliest_latest(year, month, day) @@ -88,6 +89,9 @@ def __init__( self.converter = converter def calculate_earliest_latest(self, year, month, day): + # special case: treat year = XXXX as unknown/none + if year == "XXXX": + year = None if year is not None: # could we / should we use str.isnumeric here? try: @@ -107,15 +111,14 @@ def calculate_earliest_latest(self, year, month, day): max_year = self.MAX_ALLOWABLE_YEAR # if month is passed in as a string but completely unknown, - # treat as none - # TODO: we should preserve this information somehow; - # difference between just a year and and an unknown month within a year - # maybe in terms of date precision ? + # treat as unknown/none (date precision already set in init) if month == "XX": month = None - min_month = 1 - max_month = 12 + min_month = 1 # is min month ever anything other than 1 ? + # get max month from the calendar, since it depends on the + # calendar and potentially the year (e.g. leap years in Hebrew Anno Mundi) + max_month = self.calendar_converter.max_month(max_year) if month is not None: try: # treat as an integer if we can @@ -128,11 +131,11 @@ def calculate_earliest_latest(self, year, month, day): min_month, max_month = self._missing_digit_minmax( str(month), min_month, max_month ) - # similar to month above — unknown day, but day-level granularity if day == "XX": day = None + # if day is numeric, use as is if isinstance(day, int) or isinstance(day, str) and day.isnumeric(): day = int(day) # update initial value - fully known day @@ -140,29 +143,31 @@ def calculate_earliest_latest(self, year, month, day): min_day = max_day = day else: # if we have no day or partial day, calculate min / max - min_day = 1 - # if we know year and month (or max month), calculate exactly - if year and month and isinstance(year, int): - _, max_day = monthrange(int(year), max_month) - elif year is None and month: - # If we don't have year and month, - # calculate based on a known non-leap year - # (better than just setting 31, but still not great) - _, max_day = monthrange(self.NON_LEAP_YEAR, max_month) - else: - max_day = 31 + min_day = 1 # is min day ever anything other than 1 ? + rel_year = year if year and isinstance(year, int) else None + # use month if it is an integer; otherwise use previusly determined + # max month (which may not be 12 depending if partially unknown) + rel_month = month if month and isinstance(month, int) else max_month + + max_day = self.calendar_converter.max_day(rel_year, rel_month) # if day is partially specified, narrow min/max further if day is not None: min_day, max_day = self._missing_digit_minmax(day, min_day, max_day) # TODO: special case, if we get a Feb 29 date with unknown year, - # must switch the min/max years to known leap years! + # should switch the min/max years to known leap years! # for unknowns, assume smallest possible value for earliest and # largest valid for latest - self.earliest = Date(min_year, min_month, min_day) - self.latest = Date(max_year, max_month, max_day) + # convert to Gregorian calendar so earliest/latest can always + # be used for comparison + self.earliest = Date( + *self.calendar_converter.to_gregorian(min_year, min_month, min_day) + ) + self.latest = Date( + *self.calendar_converter.to_gregorian(max_year, max_month, max_day) + ) def set_calendar(self, calendar: Union[str, Calendar]): if calendar is not None: @@ -432,6 +437,8 @@ def _missing_digit_minmax( # given a possible range, calculate min/max values for a string # with a missing digit + # TODO: test this method directly + # assuming two digit only (i.e., month or day) possible_values = [f"{n:02}" for n in range(min_val, max_val + 1)] # ensure input value has two digits diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py index f74d412..7ae3a55 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py +++ b/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py @@ -1,43 +1,77 @@ import pytest from undate.converters.calendars import HijriDateConverter -from undate.undate import Undate, Calendar -from undate.date import DatePrecision +from undate.converters.calendars.hijri.transformer import HijriUndate +from undate.undate import Calendar +from undate.date import DatePrecision, Date class TestHijriDateConverter: - def test_parse_(self): + def test_parse(self): # day # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 date_str = "7 Jumādā I 1243" date = HijriDateConverter().parse(date_str) - assert date == Undate(1243, 5, 7) + assert date == HijriUndate(1243, 5, 7) assert date.calendar == Calendar.HIJRI assert date.precision == DatePrecision.DAY assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" - # TODO: earliest/latest should be converted to Gregorian - # assert date.earliest == Date(1827, 11, 26) - # assert date.latest == Date(1827, 11, 26) # month date_str = "Rajab 495" date = HijriDateConverter().parse(date_str) - assert date == Undate(495, 7) # Rajab is month 7 + assert date == HijriUndate(495, 7) # Rajab is month 7 assert date.calendar == Calendar.HIJRI assert date.precision == DatePrecision.MONTH assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" - # TODO: Gregorian earliest/ latest - # assert date == UndateInterval(Undate(1102, 4, 28), Undate(1102, 5, 27)) + # Gregorian earliest/ latest + assert date.earliest == Date(1102, 4, 28) + assert date.latest == Date(1102, 5, 27) # year date_str = "441" date = HijriDateConverter().parse(date_str) - assert date == Undate(441) + assert date == HijriUndate(441) assert date.calendar == Calendar.HIJRI assert date.precision == DatePrecision.YEAR assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" - # TODO: Gregorian earliest/ latest - # assert date == UndateInterval(Undate(1049, 6, 11), Undate(1050, 5, 31)) + # Gregorian earliest/ latest + assert date.earliest == Date(1049, 6, 11) + assert date.latest == Date(1050, 5, 31) + + def test_gregorian_earliest_latest(self): + # earliest/latest should be converted to Gregorian for comparison + + # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 + date = HijriUndate(1243, 5, 7) + assert date.earliest == Date(1827, 11, 26) + assert date.latest == Date(1827, 11, 26) + + # Jumādā I 1243 : 1827-11-20 to 1827-12-19 + date = HijriUndate(1243, 5) + assert date.earliest == Date(1827, 11, 20) + assert date.latest == Date(1827, 12, 19) + + # Rajab 495: 1102-04-28 to 1102-05-27 (Rajab = month 7) + date = HijriUndate(495, 7) + assert date.earliest == Date(1102, 4, 28) + assert date.latest == Date(1102, 5, 27) + + # 441 : 1049-06-11 to 1050-05-31 + date = HijriUndate(441) + assert date.earliest == Date(1049, 6, 11) + assert date.latest == Date(1050, 5, 31) + + # examples from ISMI data (reformatted to day month year) + # 14 Rabīʿ I 901 : 1495-12-11 (Rabi 1 = month 3 ) + date = HijriUndate(901, 3, 14) + assert date.earliest == Date(1495, 12, 11) + assert date.latest == Date(1495, 12, 11) + + # 884 : 1479-04-03 to 1480-03-21 + date = HijriUndate(884) + assert date.earliest == Date(1479, 4, 3) + assert date.latest == Date(1480, 3, 21) def test_parse_error(self): # a string we can't parse should raise an error diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py index 0cb5aa0..7ebc117 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py +++ b/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py @@ -18,20 +18,16 @@ def test_hijri_undate(): # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 ("7 Jumādā I 1243", HijriUndate(1243, 5, 7), DatePrecision.DAY), ("Jumādā I 1243", HijriUndate(1243, 5), DatePrecision.MONTH), - # Gregorian: UndateInterval(Undate(1827, 11, 20), Undate(1827, 12, 19)) ("1243", HijriUndate(1243), DatePrecision.YEAR), # Gregorian: UndateInterval(Undate(1827, 7, 25), Undate(1828, 7, 13)), # Zū al-Qaʿdah / Dhu l-Qa'da = month 11 ("27 Dhū l-Qaʿda 632", HijriUndate(632, 11, 27), DatePrecision.DAY), # Rajab = month 7 ("Rajab 495", HijriUndate(495, 7), DatePrecision.MONTH), - # Gregorian: UndateInterval(Undate(1102, 4, 28), Undate(1102, 5, 27)), ("441", HijriUndate(441), DatePrecision.YEAR), - # Gregorian: UndateInterval(Undate(1049, 6, 11), Undate(1050, 5, 31)), # examples from ISMI data (reformatted to day month year) # Rabi 1 = month 3 ("14 Rabīʿ I 901", HijriUndate(901, 3, 14), DatePrecision.DAY), - # Gregorian: Undate(1495, 12, 11) ("884", HijriUndate(884), DatePrecision.YEAR), # Gregorian: UndateInterval(Undate(1479, 4, 3), Undate(1480, 3, 21)), # add when we support parsing ranges: diff --git a/tests/test_undate.py b/tests/test_undate.py index fd4c169..11ea550 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -2,6 +2,8 @@ from datetime import date import pytest + +from undate.converters.base import BaseDateConverter from undate.date import DatePrecision, Timedelta from undate.undate import Undate, UndateInterval, Calendar @@ -563,3 +565,11 @@ def test_duration(self): # one year set and the other not currently raises not implemented error with pytest.raises(NotImplementedError): UndateInterval(Undate(2000), Undate()).duration() + + +def test_calendar_get_converter(): + # ensure we can retrieve a calendar converter for each + # calendar named in our calendar enum + for cal in Calendar: + converter = Calendar.get_converter(cal) + assert isinstance(converter, BaseDateConverter) From fe415452b4f79ca7ba2c781ad4fa8092c39d077b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 16:44:48 -0500 Subject: [PATCH 23/77] Generate iso format date from native calendar date, not earliest/latest --- src/undate/converters/iso8601.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/undate/converters/iso8601.py b/src/undate/converters/iso8601.py index a0ecad5..09399eb 100644 --- a/src/undate/converters/iso8601.py +++ b/src/undate/converters/iso8601.py @@ -77,19 +77,33 @@ def _undate_to_string(self, undate: Undate) -> str: # TODO: may want to refactor and take advantage of the year/month/day properties # added for use in EDTF formatter code for date_portion, iso_format in self.iso_format.items(): + # is known means fully known, means guaranteed integer if undate.is_known(date_portion): # NOTE: datetime strftime for %Y for 3-digit year # results in leading zero in some environments # and not others; force year to always be 4 digits - if date_portion == "year": - date_parts.append("%04d" % undate.earliest.year) - elif date_portion == "month" and undate.earliest.month: - date_parts.append("%02d" % undate.earliest.month) - elif date_portion == "day" and undate.earliest.day: - date_parts.append("%02d" % undate.earliest.day) # type: ignore + if date_portion == "year" and undate.year: + try: + date_parts.append("%04d" % int(undate.year)) + except ValueError: + # shouldn't happen because of is_known + date_parts.append(undate.year) + elif date_portion == "month" and undate.month: + try: + date_parts.append("%02d" % int(undate.month)) + except ValueError: + # shouldn't happen because of is_known + date_parts.append(undate.month) + elif date_portion == "day" and undate.day: + try: + date_parts.append("%02d" % int(undate.day)) + except ValueError: + # shouldn't happen because of is_known + date_parts.append(undate.day) elif date_portion == "year": - # if not known but this is year, add '-' for --MM-DD unknown year format + # if year is not known, add '-' for year portion, + # to genereate --MM-DD unknown year format date_parts.append("-") # TODO: fix type error: "list[str | None]" is incompatible with "Iterable[str]" return "-".join(date_parts) # type: ignore From 3a43e6dedfcbda38ac32c98fb65c0d07def9ba05 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 16:45:09 -0500 Subject: [PATCH 24/77] Include calendar name in undate repr --- src/undate/undate.py | 5 ++--- tests/test_undate.py | 5 +++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 8500bf8..042fd9a 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -202,9 +202,8 @@ def __str__(self) -> str: return self.converter.to_string(self) def __repr__(self) -> str: - if self.label: - return "" % (self.label, self) - return "" % self + label_str = f" '{self.label}'" if self.label else "" + return f"" @classmethod def parse(cls, date_string, format) -> Union["Undate", "UndateInterval"]: diff --git a/tests/test_undate.py b/tests/test_undate.py index 11ea550..37c9af9 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -27,11 +27,12 @@ def test_partially_known_str(self): # assert str(Undate(2022, day=7)) == "2022-XX-07" @ currently returns 2022-07 def test_repr(self): - assert repr(Undate(2022, 11, 7)) == "" + assert repr(Undate(2022, 11, 7)) == "" assert ( repr(Undate(2022, 11, 7, label="A Special Day")) - == "" + == "" ) + assert repr(Undate(484, calendar=Calendar.HIJRI)) == "" def test_init_str(self): assert Undate("2000").earliest.year == 2000 From 7c9ccb7d745d079f68edb4156a9076837b60bfe1 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 17:06:50 -0500 Subject: [PATCH 25/77] Support and test comparing undates across calendars --- src/undate/undate.py | 17 +++++++-- .../test_hijri/test_hijri_converter.py | 36 ++++++++++++++++++- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 042fd9a..108c56e 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -254,11 +254,15 @@ def __eq__(self, other: object) -> bool: if other is NotImplemented: return NotImplemented + # if both dates are fully known, then earliest/latest check + # is sufficient (and will work across calendars!) + # check for apparent equality + # - earliest/latest match and both have the same precision looks_equal = ( self.earliest == other.earliest and self.latest == other.latest - and self.initial_values == other.initial_values + and self.precision == other.precision ) # if everything looks the same, check for any unknowns in initial values # the same unknown date should NOT be considered equal @@ -268,8 +272,15 @@ def __eq__(self, other: object) -> bool: # in one format (i.e. X for missing digits). # If we support other formats, will need to normalize to common # internal format for comparison - if looks_equal and any("X" in str(val) for val in self.initial_values.values()): - return False + if looks_equal: + # if any part of either date that is known is _partially_ known, + # then these dates are not equal + if any( + [self.is_partially_known(p) for p in self.initial_values.keys()] + ) or any( + [other.is_partially_known(p) for p in other.initial_values.keys()] + ): + return False return looks_equal diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py index 7ae3a55..098b0f3 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py +++ b/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py @@ -2,7 +2,7 @@ from undate.converters.calendars import HijriDateConverter from undate.converters.calendars.hijri.transformer import HijriUndate -from undate.undate import Calendar +from undate.undate import Calendar, Undate from undate.date import DatePrecision, Date @@ -80,3 +80,37 @@ def test_parse_error(self): # empty string should also error with pytest.raises(ValueError): HijriDateConverter().parse("") + + def test_compare_across_calendars(self): + # only day-precision dates can be exactly equal across calendars + + # 7 Jumādā I 1243 Hijrī : 26 November, 1827; Jumada I = month 5 + assert HijriUndate(1243, 5, 7) == Undate(1827, 11, 26) + # 14 Rabīʿ I 901 : 1495-12-11 (Rabi 1 = month 3 ) + assert HijriUndate(901, 3, 14) == Undate(1495, 12, 11) + + # greater than / less than + assert HijriUndate(901) < Undate(1500) + assert HijriUndate(901) > Undate(1450) + # Jumādā I 1243 : 1827-11-20 to 1827-12-19 + assert HijriUndate(1243, 5) > Undate(1827, 10) + assert HijriUndate(1243, 5) < Undate(1828, 1) + + # 7 Jumādā I 1243 Hijrī : 26 November, 1827, so it falls + # within (or is contained by) November 1827 + assert HijriUndate(1243, 5, 7) in Undate(1827, 11) + assert HijriUndate(1243, 5, 7) not in Undate(1827, 10) + + # sorting + sorted_dates = sorted( + [ + HijriUndate(884), # 1479 to 1480 Gregorian + HijriUndate(441), # 1049 to 1050 Gregorian + HijriUndate(901), # 1495 to 1495 Gregorian + Undate(1995), + Undate(33), + Undate(1350), + ] + ) + expected_gregorian_years = [33, 1049, 1350, 1479, 1495, 1995] + assert [d.earliest.year for d in sorted_dates] == expected_gregorian_years From b6b6376ad33b14bcbced8f08dfec6b237d3ace9d Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 17:19:39 -0500 Subject: [PATCH 26/77] Work around StrEnum not being in python until 3.11 --- pyproject.toml | 2 +- src/undate/undate.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index da206e4..f1ad9a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" license = { text = "Apache-2" } requires-python = ">= 3.9" dynamic = ["version"] -dependencies = ["lark[interegular]", "numpy", "convertdate"] +dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"] authors = [ { name = "Rebecca Sutton Koeser" }, { name = "Cole Crawford" }, diff --git a/src/undate/undate.py b/src/undate/undate.py index 108c56e..8a10073 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -1,6 +1,14 @@ import datetime import re -from enum import StrEnum, auto + +from enum import auto + +try: + # StrEnum was only added in python 3.11 + from enum import StrEnum +except ImportError: + # for python 3.10 or earlier, use third-party package + from strenum import StrEnum # type: ignore # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Dict, Optional, Union From e91b7ba00b8d47d0977f02921c5f091d5c94288c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 17:25:57 -0500 Subject: [PATCH 27/77] Allow any Hijri year (drop 3+ digit year constraint and year-month-day) --- src/undate/converters/calendars/hijri/hijri.lark | 10 ++++------ src/undate/converters/calendars/hijri/parser.py | 4 ++-- .../test_calendars/test_hijri/test_hijri_parser.py | 12 ++++++++---- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/undate/converters/calendars/hijri/hijri.lark b/src/undate/converters/calendars/hijri/hijri.lark index ae8d73e..4e6ccc7 100644 --- a/src/undate/converters/calendars/hijri/hijri.lark +++ b/src/undate/converters/calendars/hijri/hijri.lark @@ -1,9 +1,9 @@ %import common.WS %ignore WS -hijri_date: year | month year | day month year | year month | year month day -// NOTE: ISMI sample dates are year month day -// if we can assume years are 3 digits minimum, we can support year month day AND we can use faster LALR parser +// only support day month year format for now +// parser requires numeric day and year to be distinguished based on order +hijri_date: day month year | month year | year // TODO: handle date ranges? @@ -11,9 +11,7 @@ hijri_date: year | month year | day month year | year month | year month day // PGP dates use qualifiers like "first decade of" (for beginning of month) // "first third of", seasons (can look for more examples) -// TODO: is there a minimum year we need to support? -// if we can assume 3+ digit years we can distinguish between days and years, -year: /\d{3,}/ +year: /\d+/ // months month: month_1 diff --git a/src/undate/converters/calendars/hijri/parser.py b/src/undate/converters/calendars/hijri/parser.py index df13a40..273cdf9 100644 --- a/src/undate/converters/calendars/hijri/parser.py +++ b/src/undate/converters/calendars/hijri/parser.py @@ -5,5 +5,5 @@ grammar_path = pathlib.Path(__file__).parent / "hijri.lark" with open(grammar_path) as grammar: - # NOTE: LALR parser is faster but requires assumption of 3+ digit years - hijri_parser = Lark(grammar.read(), start="hijri_date", strict=True, parser="lalr") + # NOTE: LALR parser is faster but can't be used to ambiguity between years and dates + hijri_parser = Lark(grammar.read(), start="hijri_date", strict=True) diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py index 9d465ef..6b9c828 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py +++ b/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py @@ -39,10 +39,11 @@ "7 Jumādā I 1243", "29 Muḥarram 1243", "30 Muḥarram 1243", - # year month, if we can assume 3+ digit years - "901 Rabīʿ I", - # year month day - "901 Rabīʿ I 12", + "Rabīʿ I 901", + "12 Rabīʿ I 901", + # two and 1 digit years + "12 Rabīʿ I 90", + "12 Rabīʿ I 9", ] @@ -63,6 +64,9 @@ def test_should_parse(date_string): "Foo 383", # wrong format "2024-10-02", + # year month day not supported + "901 Rabīʿ I", + "901 Rabīʿ I 12", ] From 6c6f09a35cb355f0b22fd7bd8b20d4293bcbecb4 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 17:35:44 -0500 Subject: [PATCH 28/77] Confirm hijri dates + partially unknown date behavior --- .../test_hijri/test_hijri_converter.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py b/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py index 098b0f3..6541586 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py +++ b/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py @@ -81,6 +81,44 @@ def test_parse_error(self): with pytest.raises(ValueError): HijriDateConverter().parse("") + def test_partially_known(self): + # hijri dates get existing partially unknown behavior + unknown_month = HijriUndate(1243, "XX") + assert unknown_month.precision == DatePrecision.MONTH + assert unknown_month.earliest == Date( + *HijriDateConverter().to_gregorian(1243, 1, 1) + ) + assert unknown_month.latest == Date( + *HijriDateConverter().to_gregorian(1243, 12, 30) + ) + + partially_unknown_month = HijriUndate(1243, "1X") + assert partially_unknown_month.precision == DatePrecision.MONTH + assert partially_unknown_month.earliest == Date( + *HijriDateConverter().to_gregorian(1243, 10, 1) + ) + assert partially_unknown_month.latest == Date( + *HijriDateConverter().to_gregorian(1243, 12, 30) + ) + + unknown_day = HijriUndate(1243, 2, "XX") + assert unknown_day.precision == DatePrecision.DAY + assert unknown_day.earliest == Date( + *HijriDateConverter().to_gregorian(1243, 2, 1) + ) + # second month has 29 days + assert unknown_day.latest == Date( + *HijriDateConverter().to_gregorian(1243, 2, 29) + ) + partially_unknown_day = HijriUndate(1243, 2, "2X") + assert partially_unknown_day.precision == DatePrecision.DAY + assert partially_unknown_day.earliest == Date( + *HijriDateConverter().to_gregorian(1243, 2, 20) + ) + assert partially_unknown_day.latest == Date( + *HijriDateConverter().to_gregorian(1243, 2, 29) + ) + def test_compare_across_calendars(self): # only day-precision dates can be exactly equal across calendars From 5cc19fdabefc8b32a57864d169ba79c705dcbbd4 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 18:07:25 -0500 Subject: [PATCH 29/77] Add calendar converter base class and document how to add calendars --- src/undate/converters/base.py | 66 +++++++++++++++++-- src/undate/converters/calendars/gregorian.py | 14 ++-- .../converters/calendars/hijri/converter.py | 4 +- tests/test_converters/test_base.py | 4 +- tests/test_undate.py | 5 +- 5 files changed, 77 insertions(+), 16 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 02cf820..ecdbf9b 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -1,10 +1,11 @@ """ -:class:`undate.converters.BaseDateConverter` provides a base class for +:class:`~undate.converters.BaseDateConverter` provides a base class for implementing date converters, which can provide support for -parsing and generating dates in different formats and also converting -dates between different calendars. +parsing and generating dates in different formats. +The converter subclass :class:`undate.converters.BaseCalendarConverter` +provides additional functionaly needed for calendar conversion. -To add support for a new date format or calendar conversion: +To add support for a new date converter: - Create a new file under ``undate/converters/`` - For converters with sufficient complexity, you may want to create a submodule; @@ -18,6 +19,25 @@ The new subclass should be loaded automatically and included in the converters returned by :meth:`BaseDateConverter.available_converters` +To add support for a new calendar converter: + +- Create a new file under ``undate/converters/calendars/`` + - For converters with sufficient complexity, you may want to create a submodule; + see ``undate.converters.calendars.hijri`` for an example. +- Extend ``BaseCalendarConverter`` and implement ``parse`` and ``to_string`` + formatter methods as desired/appropriate for your converter as well as the + additional methods for ``max_month``, ``max_day``, and convertion ``to_gregorian`` + calendar. +- Add unit tests for the new calendar logic under ``tests/test_converters/calendars/`` +- Add the new calendar to the ``Calendar`` enum of supported calendars in + ``undate/undate.py`` and confirm that the `get_converter` method loads your + calendar converter correctly (an existing unit test should cover this). +- Consider creating a notebook to demonstrate the use of the calendar + converter. + +Calendar converter subclasses are also automatically loaded and included +in the list of available converters. + ------------------- """ @@ -90,6 +110,42 @@ def available_converters(cls) -> Dict[str, Type["BaseDateConverter"]]: """ Dictionary of available converters keyed on name. """ + return {c.name: c for c in cls.subclasses()} # type: ignore + + @classmethod + def subclasses(cls) -> list[Type["BaseDateConverter"]]: + """ + List of available converters classes. Includes calendar convert + subclasses. + """ # ensure undate converters are imported cls.import_converters() - return {c.name: c for c in cls.__subclasses__()} # type: ignore + + # find all direct subclasses, excluding base calendar converter + subclasses = cls.__subclasses__() + subclasses.remove(BaseCalendarConverter) + # add all subclasses of calendar converter base class + subclasses.extend(BaseCalendarConverter.__subclasses__()) + return subclasses + + +class BaseCalendarConverter(BaseDateConverter): + """Base class for calendar converters, with additional methods required + for calendars.""" + + #: Converter name. Subclasses must define a unique name. + name: str = "Base Calendar Converter" + + def max_month(self, year: int) -> int: + """Maximum month for this calendar for this year""" + raise NotImplementedError + + def max_day(self, year: int, month: int) -> int: + """maximum numeric day for the specified year and month in this calendar""" + raise NotImplementedError + + def to_gregorian(self, year, month, day) -> tuple[int, int, int]: + """Convert a date for this calendar specified by numeric year, month, and day, + into the Gregorian equivalent date. Should return a tuple of year, month, day. + """ + raise NotImplementedError diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index 2db1156..f794329 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -1,12 +1,11 @@ from calendar import monthrange -from typing import Optional -from undate.converters.base import BaseDateConverter +from undate.converters.base import BaseCalendarConverter -class GregorianDateConverter(BaseDateConverter): +class GregorianDateConverter(BaseCalendarConverter): """ - Converter class for Gregorian calendar. + Calendar onverter class for Gregorian calendar. """ #: converter name: Gregorian @@ -20,7 +19,8 @@ def max_month(self, year: int) -> int: """Maximum month for this calendar for this year""" return 12 - def max_day(self, year: Optional[int] = None, month: Optional[int] = None) -> int: + def max_day(self, year: int, month: int) -> int: + """maximum numeric day for the specified year and month in this calendar""" # if month is known, use that to calculate if month: # if year is known, use it; otherwise use a known non-leap year @@ -38,4 +38,8 @@ def max_day(self, year: Optional[int] = None, month: Optional[int] = None) -> in return max_day def to_gregorian(self, year, month, day) -> tuple[int, int, int]: + """Convert a Hijri date, specified by year, month, and day, + to the Gregorian equivalent date. Returns a tuple of year, month, day. + """ + return (year, month, day) diff --git a/src/undate/converters/calendars/hijri/converter.py b/src/undate/converters/calendars/hijri/converter.py index 5c694f8..9a8ad72 100644 --- a/src/undate/converters/calendars/hijri/converter.py +++ b/src/undate/converters/calendars/hijri/converter.py @@ -3,13 +3,13 @@ from convertdate import islamic # type: ignore from lark.exceptions import UnexpectedCharacters -from undate.converters.base import BaseDateConverter +from undate.converters.base import BaseCalendarConverter from undate.converters.calendars.hijri.parser import hijri_parser from undate.converters.calendars.hijri.transformer import HijriDateTransformer from undate.undate import Undate, UndateInterval -class HijriDateConverter(BaseDateConverter): +class HijriDateConverter(BaseCalendarConverter): """ Converter for Hijri / Islamic calendar. diff --git a/tests/test_converters/test_base.py b/tests/test_converters/test_base.py index 60d5d1e..1426f13 100644 --- a/tests/test_converters/test_base.py +++ b/tests/test_converters/test_base.py @@ -18,7 +18,7 @@ def test_available_converters(self): def test_converters_are_unique(self): assert len(BaseDateConverter.available_converters()) == len( - BaseDateConverter.__subclasses__() + BaseDateConverter.subclasses() ), "Formatter names have to be unique." def test_parse_not_implemented(self): @@ -60,5 +60,5 @@ class ISO8601DateFormat2(BaseDateConverter): name = "ISO8601" # duplicates existing formatter assert len(BaseDateConverter.available_converters()) != len( - BaseDateConverter.__subclasses__() + BaseDateConverter.subclasses() ) diff --git a/tests/test_undate.py b/tests/test_undate.py index 37c9af9..ecf0777 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -3,7 +3,7 @@ import pytest -from undate.converters.base import BaseDateConverter +from undate.converters.base import BaseCalendarConverter from undate.date import DatePrecision, Timedelta from undate.undate import Undate, UndateInterval, Calendar @@ -573,4 +573,5 @@ def test_calendar_get_converter(): # calendar named in our calendar enum for cal in Calendar: converter = Calendar.get_converter(cal) - assert isinstance(converter, BaseDateConverter) + assert isinstance(converter, BaseCalendarConverter) + assert converter.name.lower() == cal.name.lower() From d26574c94c76d0f57117aaff86decfc742b12021 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 19:21:24 -0500 Subject: [PATCH 30/77] Implementing Hebrew Anno Mundi calendar converter based on Hijri --- src/undate/converters/base.py | 11 +- src/undate/converters/calendars/gregorian.py | 4 - .../converters/calendars/hebrew/__init__.py | 3 + .../converters/calendars/hebrew/converter.py | 71 +++++++++ .../converters/calendars/hebrew/hebrew.lark | 55 +++++++ .../converters/calendars/hebrew/parser.py | 9 ++ .../calendars/hebrew/transformer.py | 40 +++++ .../converters/calendars/hijri/__init__.py | 3 +- .../converters/calendars/hijri/converter.py | 8 +- src/undate/undate.py | 14 +- .../test_hebrew/test_hebrew_converter.py | 142 ++++++++++++++++++ .../test_hebrew/test_hebrew_parser.py | 63 ++++++++ .../test_hebrew/test_hebrew_transformer.py | 43 ++++++ 13 files changed, 447 insertions(+), 19 deletions(-) create mode 100644 src/undate/converters/calendars/hebrew/__init__.py create mode 100644 src/undate/converters/calendars/hebrew/converter.py create mode 100644 src/undate/converters/calendars/hebrew/hebrew.lark create mode 100644 src/undate/converters/calendars/hebrew/parser.py create mode 100644 src/undate/converters/calendars/hebrew/transformer.py create mode 100644 tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py create mode 100644 tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py create mode 100644 tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index ecdbf9b..14bff87 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -28,6 +28,7 @@ formatter methods as desired/appropriate for your converter as well as the additional methods for ``max_month``, ``max_day``, and convertion ``to_gregorian`` calendar. +- Import your calendar in ``undate/converters/calendars/__init__.py`` and include in `__all__`` - Add unit tests for the new calendar logic under ``tests/test_converters/calendars/`` - Add the new calendar to the ``Calendar`` enum of supported calendars in ``undate/undate.py`` and confirm that the `get_converter` method loads your @@ -136,9 +137,13 @@ class BaseCalendarConverter(BaseDateConverter): #: Converter name. Subclasses must define a unique name. name: str = "Base Calendar Converter" - def max_month(self, year: int) -> int: - """Maximum month for this calendar for this year""" - raise NotImplementedError + def min_month(self) -> int: + """First month for this calendar. Defaults to 1.""" + return 1 + + def max_month(self) -> int: + """Last month for this calendar. Defaults to 12.""" + return 12 def max_day(self, year: int, month: int) -> int: """maximum numeric day for the specified year and month in this calendar""" diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index f794329..9a3e2a9 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -15,10 +15,6 @@ class GregorianDateConverter(BaseCalendarConverter): #: known non-leap year NON_LEAP_YEAR: int = 2022 - def max_month(self, year: int) -> int: - """Maximum month for this calendar for this year""" - return 12 - def max_day(self, year: int, month: int) -> int: """maximum numeric day for the specified year and month in this calendar""" # if month is known, use that to calculate diff --git a/src/undate/converters/calendars/hebrew/__init__.py b/src/undate/converters/calendars/hebrew/__init__.py new file mode 100644 index 0000000..4ac5b4b --- /dev/null +++ b/src/undate/converters/calendars/hebrew/__init__.py @@ -0,0 +1,3 @@ +from undate.converters.calendars.hijri.converter import HijriDateConverter + +__all__ = ["HijriDateConverter"] diff --git a/src/undate/converters/calendars/hebrew/converter.py b/src/undate/converters/calendars/hebrew/converter.py new file mode 100644 index 0000000..7d83dc7 --- /dev/null +++ b/src/undate/converters/calendars/hebrew/converter.py @@ -0,0 +1,71 @@ +from typing import Union + +from convertdate import hebrew # type: ignore +from lark.exceptions import UnexpectedCharacters + +from undate.converters.base import BaseCalendarConverter +from undate.converters.calendars.hebrew.parser import hebrew_parser +from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer +from undate.undate import Undate, UndateInterval + + +class HebrewDateConverter(BaseCalendarConverter): + """ + Converter for Hebrew Anno Mundicalendar. + + Support for parsing Anno Mundi dates and converting to Undate and UndateInterval + objects in the Gregorian calendar. + """ + + #: converter name: Hebrew + name: str = "Hebrew" + calendar_name: str = "Anno Mundi" + + def __init__(self): + self.transformer = HebrewDateTransformer() + + def min_month(self) -> int: + """first numeric month for the specified year in this calendar""" + # hebrew calendar civil year starts in Tishri + return hebrew.TISHRI + + def max_month(self) -> int: + """last numeric month for the specified year in this calendar""" + # hebrew calendar civil year starts in Tishri + # Elul is the month before Tishri + return hebrew.ELUL + + def max_day(self, year: int, month: int) -> int: + """maximum numeric day for the specified year and month in this calendar""" + # NOTE: unreleased v2.4.1 of convertdate standardizes month_days to month_length + return hebrew.month_days(year, month) + + def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: + """Convert a Hebrew date, specified by year, month, and day, + to the Gregorian equivalent date. Returns a tuple of year, month, day. + """ + return hebrew.to_gregorian(year, month, day) + + def parse(self, value: str) -> Union[Undate, UndateInterval]: + """ + Parse a Hebrew date string and return an :class:`~undate.undate.Undate` or + :class:`~undate.undate.UndateInterval`. + The Hebrew date string is preserved in the undate label. + """ + if not value: + raise ValueError("Parsing empty string is not supported") + + # parse the input string, then transform to undate object + try: + # parse the string with our Hebrew date parser + parsetree = hebrew_parser.parse(value) + # transform the parse tree into an undate or undate interval + undate_obj = self.transformer.transform(parsetree) + # set the original date as a label, with the calendar name + undate_obj.label = f"{value} {self.calendar_name}" + return undate_obj + except UnexpectedCharacters as err: + raise ValueError(f"Could not parse '{value}' as a Hebrew date") from err + + # do we need to support conversion the other direction? + # i.e., generate a Hebrew date from an abitrary undate or undate interval? diff --git a/src/undate/converters/calendars/hebrew/hebrew.lark b/src/undate/converters/calendars/hebrew/hebrew.lark new file mode 100644 index 0000000..64e527b --- /dev/null +++ b/src/undate/converters/calendars/hebrew/hebrew.lark @@ -0,0 +1,55 @@ +%import common.WS +%ignore WS + +// only support day month year format for now +// parser requires numeric day and year to be distinguished based on order +hebrew_date: day month year | month year | year + +// TODO: handle date ranges? + +// TODO: add support for qualifiers? +// PGP dates use qualifiers like "first decade of" (for beginning of month) +// "first third of", seasons (can look for more examples) + +year: /\d+/ + +// months +month: month_1 + | month_2 + | month_3 + | month_4 + | month_5 + | month_6 + | month_7 + | month_8 + | month_9 + | month_10 + | month_11 + | month_12 + | month_13 +// months have 29 or 30 days; we do not expect leading zeroes +day: /[1-9]/ | /[12][0-9]/ | /30/ + +// months, in order; from convertdate list +// with variants from Princeton Geniza Project +// support matching with and without accents +month_1: "Nisan" +// Iyar or Iyyar +month_2: /Iyy?ar/ +month_3: "Sivan" +month_4: "Tammuz" +month_5: "Av" +month_6: "Elul" +// Tishrei or Tishri +month_7: /Tishre?i/ +month_8: "Heshvan" +month_9: "Kislev" +// Tevet or Teveth +month_10: /[ṬT]eveth?/ +month_11: "Shevat" +// Adar I or Adar +month_12: /Adar( I)?/ +// Adar II or Adar Bet +month_13: /Adar (II|Bet)/ + + diff --git a/src/undate/converters/calendars/hebrew/parser.py b/src/undate/converters/calendars/hebrew/parser.py new file mode 100644 index 0000000..5654f60 --- /dev/null +++ b/src/undate/converters/calendars/hebrew/parser.py @@ -0,0 +1,9 @@ +import pathlib + +from lark import Lark + +grammar_path = pathlib.Path(__file__).parent / "hebrew.lark" + +with open(grammar_path) as grammar: + # NOTE: LALR parser is faster but can't be used to ambiguity between years and dates + hebrew_parser = Lark(grammar.read(), start="hebrew_date", strict=True) diff --git a/src/undate/converters/calendars/hebrew/transformer.py b/src/undate/converters/calendars/hebrew/transformer.py new file mode 100644 index 0000000..a6d2888 --- /dev/null +++ b/src/undate/converters/calendars/hebrew/transformer.py @@ -0,0 +1,40 @@ +from lark import Transformer, Tree + +from undate.undate import Undate, Calendar + + +class HebrewUndate(Undate): + """Undate convience subclass; sets default calendar to Hebrew.""" + + calendar = Calendar.HEBREW + + +class HebrewDateTransformer(Transformer): + """Transform a Hebrew date parse tree and return an Undate or + UndateInterval.""" + + def hebrew_date(self, items): + parts = {} + for child in items: + if child.data in ["year", "month", "day"]: + # in each case we expect one integer value; + # anonymous tokens convert to their value and cast as int + value = int(child.children[0]) + parts[str(child.data)] = value + + # initialize and return an undate with islamic year, month, day and + # islamic calendar + return HebrewUndate(**parts) + + # year translation is not needed since we want a tree with name year + # this is equivalent to a no-op + # def year(self, items): + # return Tree(data="year", children=[items[0]]) + + def month(self, items): + # month has a nested tree for the rule and the value + # the name of the rule (month_1, month_2, etc) gives us the + # number of the month needed for converting the date + tree = items[0] + month_n = tree.data.split("_")[-1] + return Tree(data="month", children=[month_n]) diff --git a/src/undate/converters/calendars/hijri/__init__.py b/src/undate/converters/calendars/hijri/__init__.py index 4ac5b4b..8c28d52 100644 --- a/src/undate/converters/calendars/hijri/__init__.py +++ b/src/undate/converters/calendars/hijri/__init__.py @@ -1,3 +1,4 @@ from undate.converters.calendars.hijri.converter import HijriDateConverter +from undate.converters.calendars.hebrew.converter import HebrewDateConverter -__all__ = ["HijriDateConverter"] +__all__ = ["HijriDateConverter", "HebrewDateConverter"] diff --git a/src/undate/converters/calendars/hijri/converter.py b/src/undate/converters/calendars/hijri/converter.py index 9a8ad72..910c67e 100644 --- a/src/undate/converters/calendars/hijri/converter.py +++ b/src/undate/converters/calendars/hijri/converter.py @@ -24,10 +24,6 @@ class HijriDateConverter(BaseCalendarConverter): def __init__(self): self.transformer = HijriDateTransformer() - def max_month(self, year: int) -> int: - """maximum numeric month for the specified year in this calendar""" - return 12 - def max_day(self, year: int, month: int) -> int: """maximum numeric day for the specified year and month in this calendar""" return islamic.month_length(year, month) @@ -41,8 +37,8 @@ def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: def parse(self, value: str) -> Union[Undate, UndateInterval]: """ Parse a Hijri date string and return an :class:`~undate.undate.Undate` or - :class:`~undate.undate.UndateInterval` in Gregorian calendar. - The Hijri date string is preserved in the undate label + :class:`~undate.undate.UndateInterval`. + The Hijri date string is preserved in the undate label. """ if not value: raise ValueError("Parsing empty string is not supported") diff --git a/src/undate/undate.py b/src/undate/undate.py index 8a10073..0c635c0 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -22,6 +22,7 @@ class Calendar(StrEnum): GREGORIAN = auto() HIJRI = auto() + HEBREW = auto() @staticmethod def get_converter(calendar): @@ -123,10 +124,11 @@ def calculate_earliest_latest(self, year, month, day): if month == "XX": month = None - min_month = 1 # is min month ever anything other than 1 ? - # get max month from the calendar, since it depends on the - # calendar and potentially the year (e.g. leap years in Hebrew Anno Mundi) - max_month = self.calendar_converter.max_month(max_year) + # get first and last month from the calendar, since it is not + # always 1 and 12 + # TODO need to differentiate between min/max and first/last! + min_month = self.calendar_converter.min_month() + max_month = self.calendar_converter.max_month() if month is not None: try: # treat as an integer if we can @@ -137,7 +139,9 @@ def calculate_earliest_latest(self, year, month, day): except ValueError: # if not, calculate min/max for missing digits min_month, max_month = self._missing_digit_minmax( - str(month), min_month, max_month + str(month), + 1, + 12, # min_month, max_month ) # similar to month above — unknown day, but day-level granularity if day == "XX": diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py new file mode 100644 index 0000000..f335975 --- /dev/null +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py @@ -0,0 +1,142 @@ +import pytest + +from undate.converters.calendars.hebrew.converter import HebrewDateConverter +from undate.converters.calendars.hebrew.transformer import HebrewUndate +from undate.undate import Calendar, Undate +from undate.date import DatePrecision, Date + + +class TestHebrewDateConverter: + def test_parse(self): + # day + # 26 Tammuz 4816: Tammuz = month 4 (17 July, 1056 Gregorian) + date_str = "26 Tammuz 4816" + date = HebrewDateConverter().parse(date_str) + assert date == HebrewUndate(4816, 4, 26) + assert date.calendar == Calendar.HEBREW + assert date.precision == DatePrecision.DAY + assert date.label == f"{date_str} {HebrewDateConverter.calendar_name}" + + # month + date_str = "Ṭevet 5362" + date = HebrewDateConverter().parse(date_str) + assert date == HebrewUndate(5362, 10) # Teveth = month 10 + assert date.calendar == Calendar.HEBREW + assert date.precision == DatePrecision.MONTH + assert date.label == f"{date_str} {HebrewDateConverter.calendar_name}" + + # year + date_str = "4932" + date = HebrewDateConverter().parse(date_str) + assert date == HebrewUndate(4932) + assert date.calendar == Calendar.HEBREW + assert date.precision == DatePrecision.YEAR + assert date.label == f"{date_str} {HebrewDateConverter.calendar_name}" + + def test_gregorian_earliest_latest(self): + # earliest/latest should be converted to Gregorian for comparison + + # full date + + # 26 Tammuz 4816: 17 July, 1056; Tammuz = month 4 + date = HebrewUndate(4816, 4, 26) + assert date.earliest == Date(1056, 7, 17) + assert date.latest == Date(1056, 7, 17) + # 13 Tishrei 5416 Anno Mundi (1655-10-14) + date = HebrewUndate(5416, 7, 13) # Tishrei = month 7 + assert date.earliest == Date(1655, 10, 14) + assert date.latest == Date(1655, 10, 14) + + # month + + # Ṭevet 5362 Anno Mundi (25 December, 1601 – 22 January, 1602) + date = HebrewUndate(5362, 10) + assert date.earliest == Date(1601, 12, 25) + assert date.latest == Date(1602, 1, 22) + + # year + # 5416 : October 1655 to September 1656 + date = HebrewUndate(5416) + assert date.earliest == Date(1655, 10, 2) + assert date.latest == Date(1656, 9, 18) + + def test_parse_error(self): + # a string we can't parse should raise an error + with pytest.raises(ValueError): + HebrewDateConverter().parse("January 2, 1991") + # empty string should also error + with pytest.raises(ValueError): + HebrewDateConverter().parse("") + + def test_partially_known(self): + # hebrew dates get existing partially unknown behavior + + converter = HebrewDateConverter() + + # hebrew first/last month are not the same as min/max + unknown_month = HebrewUndate(1243, "XX") + assert unknown_month.precision == DatePrecision.MONTH + assert unknown_month.earliest == Date( + *converter.to_gregorian(1243, converter.min_month(), 1) + ) + max_month = converter.max_month() + assert unknown_month.latest == Date( + *converter.to_gregorian(1243, max_month, converter.max_day(1243, max_month)) + ) + + partially_unknown_month = HebrewUndate(1243, "1X") + assert partially_unknown_month.precision == DatePrecision.MONTH + assert partially_unknown_month.earliest == Date( + *converter.to_gregorian(1243, 10, 1) + ) + assert partially_unknown_month.latest == Date( + *converter.to_gregorian(1243, 12, 30) + ) + + # second month has 29 days + unknown_day = HebrewUndate(1243, 2, "XX") + assert unknown_day.precision == DatePrecision.DAY + assert unknown_day.earliest == Date(*converter.to_gregorian(1243, 2, 1)) + assert unknown_day.latest == Date(*converter.to_gregorian(1243, 2, 29)) + + partially_unknown_day = HebrewUndate(1243, 2, "2X") + assert partially_unknown_day.precision == DatePrecision.DAY + assert partially_unknown_day.earliest == Date( + *converter.to_gregorian(1243, 2, 20) + ) + assert partially_unknown_day.latest == Date( + *converter.to_gregorian(1243, 2, 29) + ) + + def test_compare_across_calendars(self): + # only day-precision dates can be exactly equal across calendars + + # 26 Tammuz 4816: Tammuz = month 4 (17 July, 1056 Gregorian) + assert HebrewUndate(4816, 4, 26) == Undate(1056, 7, 17) + # 13 Tishrei 5416; Tieshrei = month 7 (1655-10-14) + assert HebrewUndate(5416, 7, 13) == Undate(1655, 10, 14) + + # greater than / less than + assert HebrewUndate(4816) < Undate(1060) + assert HebrewUndate(5416) < Undate(1660) + assert HebrewUndate(5416, 7) > Undate(1655, 1) + assert HebrewUndate(4816, 4, 26) > Undate(1055, 5) + + # 26 Tammuz 4816: Tammuz = month 4 (17 July, 1056) + # so it falls within or is c ontained by July 1056 + assert HebrewUndate(4816, 4, 26) in Undate(1056, 7) + assert HebrewUndate(4816, 4, 26) not in Undate(1054) + + # sorting + sorted_dates = sorted( + [ + HebrewUndate(4816, 4, 26), # 1056-07-17 + HebrewUndate(5416), # 1655 + HebrewUndate(500), # -3261 + Undate(1995), + Undate(33), + Undate(1350), + ] + ) + expected_gregorian_years = [-3261, 33, 1056, 1350, 1655, 1995] + assert [d.earliest.year for d in sorted_dates] == expected_gregorian_years diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py new file mode 100644 index 0000000..e4894b1 --- /dev/null +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py @@ -0,0 +1,63 @@ +import pytest +from undate.converters.calendars.hebrew.parser import hebrew_parser + + +# for now, just test that valid dates can be parsed + +testcases = [ + # year + "5362", + # month + year + # - with and without accent + "Ṭevet 5362", + "Tevet 5362", + "Elul 4932", + "Sivan 5581", + # variant month name, with or without accent + "Ṭeveth 5362", + "Teveth 5362", + "Iyyar 1526", + "Iyar 1526", + # day month year + "26 Tammuz 4816", + "7 Heshvan 5425", + "26 Tishrei 5416", + "26 Tishri 5416", + "14 Adar 5403", + "14 Adar I 5403", + "9 Adar II 5404", + "9 Adar Bet 5404", + # two and 1 digit years + "536", + "53", + "3", +] + + +@pytest.mark.parametrize("date_string", testcases) +def test_should_parse(date_string): + assert hebrew_parser.parse(date_string) + + +error_cases = [ + # invalid days + "0 Tammuz 5403", + "31 Tishri 5403", + # month alone + "Tishri", + # month day only + "12 Heshvan", + # invalid month + "Foo 383", + # wrong format + "2024-10-02", + # year month day not supported + "5403 Adar", + "5403 Adar 14", +] + + +@pytest.mark.parametrize("date_string", error_cases) +def test_should_error(date_string): + with pytest.raises(Exception): + hebrew_parser.parse(date_string) diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py new file mode 100644 index 0000000..6e4a5e6 --- /dev/null +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py @@ -0,0 +1,43 @@ +import pytest +from undate.converters.calendars.hebrew.parser import hebrew_parser +from undate.converters.calendars.hebrew.transformer import ( + HebrewDateTransformer, + HebrewUndate, +) +from undate.undate import Undate, Calendar +from undate.date import DatePrecision + + +def test_hebrew_undate(): + assert HebrewUndate(848).calendar == Calendar.HEBREW + + +testcases = [ + # examples from Princeton Geniza Project + # date conversions checked with https://www.muqawwim.com/ + # 26 Tammuz 4816; Tammuz = month 4 + ("26 Tammuz 4816", HebrewUndate(4816, 4, 26), DatePrecision.DAY), + ("Tammuz 4816", HebrewUndate(4816, 4), DatePrecision.MONTH), + ("4816", HebrewUndate(4816), DatePrecision.YEAR), + # 26 Tishrei 5416: Tishrei = month 7 + ("26 Tishrei 5416", HebrewUndate(5416, 7, 26), DatePrecision.DAY), + # Ṭeveth = month 10 + ("Ṭevet 5362", HebrewUndate(5362, 10), DatePrecision.MONTH), + ("5362", HebrewUndate(5362), DatePrecision.YEAR), + # add when we support parsing ranges: + # Adar I and Adar II 5453 : (1693 CE) +] + + +@pytest.mark.parametrize("date_string,expected,expected_precision", testcases) +def test_transform(date_string, expected, expected_precision): + transformer = HebrewDateTransformer(visit_tokens=True) + # parse the input string, then transform to undate object + parsetree = hebrew_parser.parse(date_string) + transformed_date = transformer.transform(parsetree) + assert transformed_date == expected + # currently only undates have date precision + if isinstance(transformed_date, Undate): + assert transformed_date.precision == expected_precision + # transformer doesn't have access to date string, + # label will need to be set by the converter class From 5660fa2fd8c0f43e8779d46229f3b1d6fb713ef1 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 19:29:03 -0500 Subject: [PATCH 31/77] Fix mis-formatted docstring --- src/undate/converters/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index ecdbf9b..630c9f5 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -25,9 +25,9 @@ - For converters with sufficient complexity, you may want to create a submodule; see ``undate.converters.calendars.hijri`` for an example. - Extend ``BaseCalendarConverter`` and implement ``parse`` and ``to_string`` - formatter methods as desired/appropriate for your converter as well as the - additional methods for ``max_month``, ``max_day``, and convertion ``to_gregorian`` - calendar. + formatter methods as desired/appropriate for your converter as well as the + additional methods for ``max_month``, ``max_day``, and convertion ``to_gregorian`` + calendar. - Add unit tests for the new calendar logic under ``tests/test_converters/calendars/`` - Add the new calendar to the ``Calendar`` enum of supported calendars in ``undate/undate.py`` and confirm that the `get_converter` method loads your From c6ed8179e98ff3c58e61cbfd6fee649f7456b4b2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 19:29:03 -0500 Subject: [PATCH 32/77] Fix mis-formatted docstring --- src/undate/converters/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 14bff87..150fc5f 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -25,9 +25,9 @@ - For converters with sufficient complexity, you may want to create a submodule; see ``undate.converters.calendars.hijri`` for an example. - Extend ``BaseCalendarConverter`` and implement ``parse`` and ``to_string`` - formatter methods as desired/appropriate for your converter as well as the - additional methods for ``max_month``, ``max_day``, and convertion ``to_gregorian`` - calendar. + formatter methods as desired/appropriate for your converter as well as the + additional methods for ``max_month``, ``max_day``, and convertion ``to_gregorian`` + calendar. - Import your calendar in ``undate/converters/calendars/__init__.py`` and include in `__all__`` - Add unit tests for the new calendar logic under ``tests/test_converters/calendars/`` - Add the new calendar to the ``Calendar`` enum of supported calendars in From 88e4d1741d16a054775fa061e5c2d593f9be070c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 26 Nov 2024 21:43:59 -0500 Subject: [PATCH 33/77] Adjust imports for hebrew calendar converter --- src/undate/converters/calendars/__init__.py | 3 ++- src/undate/converters/calendars/hebrew/__init__.py | 4 ++-- .../test_calendars/test_hebrew/test_hebrew_converter.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/undate/converters/calendars/__init__.py b/src/undate/converters/calendars/__init__.py index 635af21..c14e115 100644 --- a/src/undate/converters/calendars/__init__.py +++ b/src/undate/converters/calendars/__init__.py @@ -1,4 +1,5 @@ from undate.converters.calendars.gregorian import GregorianDateConverter from undate.converters.calendars.hijri import HijriDateConverter +from undate.converters.calendars.hebrew import HebrewDateConverter -__all__ = ["HijriDateConverter", "GregorianDateConverter"] +__all__ = ["HijriDateConverter", "GregorianDateConverter", "HebrewDateConverter"] diff --git a/src/undate/converters/calendars/hebrew/__init__.py b/src/undate/converters/calendars/hebrew/__init__.py index 4ac5b4b..e612ce3 100644 --- a/src/undate/converters/calendars/hebrew/__init__.py +++ b/src/undate/converters/calendars/hebrew/__init__.py @@ -1,3 +1,3 @@ -from undate.converters.calendars.hijri.converter import HijriDateConverter +from undate.converters.calendars.hebrew.converter import HebrewDateConverter -__all__ = ["HijriDateConverter"] +__all__ = ["HebrewDateConverter"] diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py index f335975..1c05632 100644 --- a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py @@ -1,6 +1,6 @@ import pytest -from undate.converters.calendars.hebrew.converter import HebrewDateConverter +from undate.converters.calendars import HebrewDateConverter from undate.converters.calendars.hebrew.transformer import HebrewUndate from undate.undate import Calendar, Undate from undate.date import DatePrecision, Date From f908cd5465916c9ed6a1f1fd338ad94b4b1c35f3 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 6 Dec 2024 10:51:55 +0100 Subject: [PATCH 34/77] Add comment about earliest Hebrew year in grammar --- src/undate/converters/calendars/hebrew/hebrew.lark | 1 + 1 file changed, 1 insertion(+) diff --git a/src/undate/converters/calendars/hebrew/hebrew.lark b/src/undate/converters/calendars/hebrew/hebrew.lark index 64e527b..b55ec3f 100644 --- a/src/undate/converters/calendars/hebrew/hebrew.lark +++ b/src/undate/converters/calendars/hebrew/hebrew.lark @@ -11,6 +11,7 @@ hebrew_date: day month year | month year | year // PGP dates use qualifiers like "first decade of" (for beginning of month) // "first third of", seasons (can look for more examples) +// Hebrew calendar starts with year 1 in 3761 BCE year: /\d+/ // months From c24cd34a6bfb6603c8639c299b8077b288678cb5 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 6 Dec 2024 10:52:40 +0100 Subject: [PATCH 35/77] Test exceptions and parser type errors more specific --- .../test_hebrew/test_hebrew_converter.py | 7 ++++++ .../test_hebrew/test_hebrew_parser.py | 24 ++++++++++--------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py index 1c05632..319b551 100644 --- a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py @@ -68,6 +68,13 @@ def test_parse_error(self): with pytest.raises(ValueError): HebrewDateConverter().parse("") + # non-string input should raise a type error + with pytest.raises(TypeError): + HebrewDateConverter().parse(42) + + with pytest.raises(TypeError): + HebrewDateConverter().parse({"foo": "bar"}) + def test_partially_known(self): # hebrew dates get existing partially unknown behavior diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py index e4894b1..69b929e 100644 --- a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py @@ -1,4 +1,6 @@ import pytest +from lark.exceptions import UnexpectedCharacters, UnexpectedEOF + from undate.converters.calendars.hebrew.parser import hebrew_parser @@ -41,23 +43,23 @@ def test_should_parse(date_string): error_cases = [ # invalid days - "0 Tammuz 5403", - "31 Tishri 5403", + ("0 Tammuz 5403", UnexpectedCharacters), + ("31 Tishri 5403", UnexpectedCharacters), # month alone - "Tishri", + ("Tishri", UnexpectedEOF), # month day only - "12 Heshvan", + ("12 Heshvan", UnexpectedEOF), # invalid month - "Foo 383", + ("Foo 383", UnexpectedCharacters), # wrong format - "2024-10-02", + ("2024-10-02", UnexpectedCharacters), # year month day not supported - "5403 Adar", - "5403 Adar 14", + ("5403 Adar", UnexpectedCharacters), + ("5403 Adar 14", UnexpectedCharacters), ] -@pytest.mark.parametrize("date_string", error_cases) -def test_should_error(date_string): - with pytest.raises(Exception): +@pytest.mark.parametrize("date_string,exception", error_cases) +def test_should_error(date_string, exception): + with pytest.raises(exception): hebrew_parser.parse(date_string) From 5773bf7d0848481adcc522b8f6ed2e2938fc606c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 6 Dec 2024 10:53:45 +0100 Subject: [PATCH 36/77] Run unit tests on pull request to any branch --- .github/workflows/unit_tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 89df8cb..17a1c7a 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -8,6 +8,8 @@ on: - 'undate/**' - 'tests/**' pull_request: + branches: + - "**" env: # python version used to calculate and submit code coverage From 3032785dbf16de32ffcfc6ff51a54eb33bf7c406 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 6 Dec 2024 10:57:26 +0100 Subject: [PATCH 37/77] Fix incorrect import --- src/undate/converters/calendars/hijri/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/undate/converters/calendars/hijri/__init__.py b/src/undate/converters/calendars/hijri/__init__.py index 8c28d52..4ac5b4b 100644 --- a/src/undate/converters/calendars/hijri/__init__.py +++ b/src/undate/converters/calendars/hijri/__init__.py @@ -1,4 +1,3 @@ from undate.converters.calendars.hijri.converter import HijriDateConverter -from undate.converters.calendars.hebrew.converter import HebrewDateConverter -__all__ = ["HijriDateConverter", "HebrewDateConverter"] +__all__ = ["HijriDateConverter"] From 91376088979e277e55f82f5e554dc3b7b86c13c3 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 6 Dec 2024 10:57:43 +0100 Subject: [PATCH 38/77] Force calendar converters to implement min/max month methods --- src/undate/converters/base.py | 8 ++++---- src/undate/converters/calendars/gregorian.py | 8 ++++++++ src/undate/converters/calendars/hijri/converter.py | 8 ++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 150fc5f..bcd90c2 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -138,12 +138,12 @@ class BaseCalendarConverter(BaseDateConverter): name: str = "Base Calendar Converter" def min_month(self) -> int: - """First month for this calendar. Defaults to 1.""" - return 1 + """First month for this calendar.""" + raise NotImplementedError def max_month(self) -> int: - """Last month for this calendar. Defaults to 12.""" - return 12 + """Last month for this calendar.""" + raise NotImplementedError def max_day(self, year: int, month: int) -> int: """maximum numeric day for the specified year and month in this calendar""" diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index 9a3e2a9..af8ea25 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -15,6 +15,14 @@ class GregorianDateConverter(BaseCalendarConverter): #: known non-leap year NON_LEAP_YEAR: int = 2022 + def min_month(self) -> int: + """First month for the Gregorian calendar.""" + return 1 + + def max_month(self) -> int: + """maximum numeric month for the specified year in the Gregorian calendar""" + return 12 + def max_day(self, year: int, month: int) -> int: """maximum numeric day for the specified year and month in this calendar""" # if month is known, use that to calculate diff --git a/src/undate/converters/calendars/hijri/converter.py b/src/undate/converters/calendars/hijri/converter.py index 910c67e..1cb7c82 100644 --- a/src/undate/converters/calendars/hijri/converter.py +++ b/src/undate/converters/calendars/hijri/converter.py @@ -28,6 +28,14 @@ def max_day(self, year: int, month: int) -> int: """maximum numeric day for the specified year and month in this calendar""" return islamic.month_length(year, month) + def min_month(self) -> int: + """First month for this calendar.""" + return 1 + + def max_month(self) -> int: + """maximum numeric month for the specified year in this calendar""" + return 12 + def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: """Convert a Hijri date, specified by year, month, and day, to the Gregorian equivalent date. Returns a tuple of year, month, day. From 920f7361fa21818ff57a217b5f9edbb144ca5f62 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 6 Dec 2024 11:34:03 +0100 Subject: [PATCH 39/77] Differentiate min/max month from first/last month --- src/undate/converters/base.py | 14 ++++++++--- src/undate/converters/calendars/gregorian.py | 2 +- .../converters/calendars/hebrew/converter.py | 19 ++++++++++----- .../converters/calendars/hijri/converter.py | 6 ++--- src/undate/undate.py | 24 +++++++++---------- .../test_hebrew/test_hebrew_converter.py | 14 +++++++---- 6 files changed, 50 insertions(+), 29 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index bcd90c2..5fefe49 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -138,13 +138,21 @@ class BaseCalendarConverter(BaseDateConverter): name: str = "Base Calendar Converter" def min_month(self) -> int: - """First month for this calendar.""" + """Smallest numeric month for this calendar.""" raise NotImplementedError - def max_month(self) -> int: - """Last month for this calendar.""" + def max_month(self, year: int) -> int: + """Maximum numeric month for this calendar""" raise NotImplementedError + def first_month(self) -> int: + """first month in this calendar; by default, returns :meth:`min_month`.""" + return self.min_month() + + def last_month(self, year: int) -> int: + """last month in this calendar; by default, returns :meth:`max_month`.""" + return self.max_month(year) + def max_day(self, year: int, month: int) -> int: """maximum numeric day for the specified year and month in this calendar""" raise NotImplementedError diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index af8ea25..59cde48 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -19,7 +19,7 @@ def min_month(self) -> int: """First month for the Gregorian calendar.""" return 1 - def max_month(self) -> int: + def max_month(self, year: int) -> int: """maximum numeric month for the specified year in the Gregorian calendar""" return 12 diff --git a/src/undate/converters/calendars/hebrew/converter.py b/src/undate/converters/calendars/hebrew/converter.py index 7d83dc7..b8b4620 100644 --- a/src/undate/converters/calendars/hebrew/converter.py +++ b/src/undate/converters/calendars/hebrew/converter.py @@ -25,14 +25,21 @@ def __init__(self): self.transformer = HebrewDateTransformer() def min_month(self) -> int: - """first numeric month for the specified year in this calendar""" - # hebrew calendar civil year starts in Tishri + """Smallest numeric month for this calendar.""" + return 1 + + def max_month(self, year: int) -> int: + """Maximum numeric month for this calendar. In Hebrew calendar, this is 12 or 13 + depending on whether it is a leap year.""" + return hebrew.year_months(year) + + def first_month(self) -> int: + """First month in this calendar. The Hebrew civil year starts in Tishri.""" return hebrew.TISHRI - def max_month(self) -> int: - """last numeric month for the specified year in this calendar""" - # hebrew calendar civil year starts in Tishri - # Elul is the month before Tishri + def last_month(self, year: int) -> int: + """Last month in this calendar. Hebrew civil year starts in Tishri, + Elul is the month before Tishri.""" return hebrew.ELUL def max_day(self, year: int, month: int) -> int: diff --git a/src/undate/converters/calendars/hijri/converter.py b/src/undate/converters/calendars/hijri/converter.py index 1cb7c82..b4b81b1 100644 --- a/src/undate/converters/calendars/hijri/converter.py +++ b/src/undate/converters/calendars/hijri/converter.py @@ -29,11 +29,11 @@ def max_day(self, year: int, month: int) -> int: return islamic.month_length(year, month) def min_month(self) -> int: - """First month for this calendar.""" + """smallest numeric month for this calendar.""" return 1 - def max_month(self) -> int: - """maximum numeric month for the specified year in this calendar""" + def max_month(self, year: int) -> int: + """maximum numeric month for this calendar""" return 12 def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: diff --git a/src/undate/undate.py b/src/undate/undate.py index 0c635c0..fab277c 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -124,24 +124,24 @@ def calculate_earliest_latest(self, year, month, day): if month == "XX": month = None - # get first and last month from the calendar, since it is not - # always 1 and 12 - # TODO need to differentiate between min/max and first/last! + # get first and last month from the calendar (not always 1 and 12) + # as well as min/max months + earliest_month = self.calendar_converter.first_month() + latest_month = self.calendar_converter.last_month(max_year) + min_month = self.calendar_converter.min_month() - max_month = self.calendar_converter.max_month() + max_month = self.calendar_converter.max_month(max_year) if month is not None: try: # treat as an integer if we can month = int(month) # update initial value self.initial_values["month"] = month - min_month = max_month = month + earliest_month = latest_month = month except ValueError: # if not, calculate min/max for missing digits - min_month, max_month = self._missing_digit_minmax( - str(month), - 1, - 12, # min_month, max_month + earliest_month, latest_month = self._missing_digit_minmax( + str(month), min_month, max_month ) # similar to month above — unknown day, but day-level granularity if day == "XX": @@ -159,7 +159,7 @@ def calculate_earliest_latest(self, year, month, day): rel_year = year if year and isinstance(year, int) else None # use month if it is an integer; otherwise use previusly determined # max month (which may not be 12 depending if partially unknown) - rel_month = month if month and isinstance(month, int) else max_month + rel_month = month if month and isinstance(month, int) else latest_month max_day = self.calendar_converter.max_day(rel_year, rel_month) @@ -175,10 +175,10 @@ def calculate_earliest_latest(self, year, month, day): # convert to Gregorian calendar so earliest/latest can always # be used for comparison self.earliest = Date( - *self.calendar_converter.to_gregorian(min_year, min_month, min_day) + *self.calendar_converter.to_gregorian(min_year, earliest_month, min_day) ) self.latest = Date( - *self.calendar_converter.to_gregorian(max_year, max_month, max_day) + *self.calendar_converter.to_gregorian(max_year, latest_month, max_day) ) def set_calendar(self, calendar: Union[str, Calendar]): diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py index 319b551..c3c8b7c 100644 --- a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py @@ -84,11 +84,13 @@ def test_partially_known(self): unknown_month = HebrewUndate(1243, "XX") assert unknown_month.precision == DatePrecision.MONTH assert unknown_month.earliest == Date( - *converter.to_gregorian(1243, converter.min_month(), 1) + *converter.to_gregorian(1243, converter.first_month(), 1) ) - max_month = converter.max_month() + last_month = converter.last_month(year=1243) assert unknown_month.latest == Date( - *converter.to_gregorian(1243, max_month, converter.max_day(1243, max_month)) + *converter.to_gregorian( + 1243, last_month, converter.max_day(1243, last_month) + ) ) partially_unknown_month = HebrewUndate(1243, "1X") @@ -96,8 +98,12 @@ def test_partially_known(self): assert partially_unknown_month.earliest == Date( *converter.to_gregorian(1243, 10, 1) ) + # for unknown digit, assume largest possible value instead + # of last semantic monthin the year + last_month = converter.max_month(year=1243) + last_day = converter.max_day(1243, last_month) assert partially_unknown_month.latest == Date( - *converter.to_gregorian(1243, 12, 30) + *converter.to_gregorian(1243, last_month, last_day) ) # second month has 29 days From b7ae594016d2e73390745cef6e394f5d89837053 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Sat, 7 Dec 2024 12:08:33 +0100 Subject: [PATCH 40/77] Rewrite gregorian calendar docstring that incorrectly ref'ed Hijri --- src/undate/converters/calendars/gregorian.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index 59cde48..63a3dd9 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -42,8 +42,9 @@ def max_day(self, year: int, month: int) -> int: return max_day def to_gregorian(self, year, month, day) -> tuple[int, int, int]: - """Convert a Hijri date, specified by year, month, and day, - to the Gregorian equivalent date. Returns a tuple of year, month, day. + """Convert to Gregorian date. This returns the specified by year, month, + and day unchanged, but is provided for consistency since all calendar + converters need to support conversion to Gregorian calendar for + a common point of comparison. """ - return (year, month, day) From 759d0c7b4b471d004abd08d0f3ed76674bbfc2db Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Sat, 7 Dec 2024 12:21:19 +0100 Subject: [PATCH 41/77] Fix docstring typo caught by @coderabbitai --- src/undate/converters/calendars/gregorian.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py index 63a3dd9..5a1d2dc 100644 --- a/src/undate/converters/calendars/gregorian.py +++ b/src/undate/converters/calendars/gregorian.py @@ -5,11 +5,12 @@ class GregorianDateConverter(BaseCalendarConverter): """ - Calendar onverter class for Gregorian calendar. + Calendar converter class for Gregorian calendar. """ #: converter name: Gregorian name: str = "Gregorian" + #: calendar calendar_name: str = "Gregorian" #: known non-leap year From d9fd4ba39cf6dc7f6c6032121f8f606d122c18a9 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 20 Dec 2024 15:30:14 -0500 Subject: [PATCH 42/77] Include calendar converters in sphinx docs and add basic usage to readme --- README.md | 29 ++++++++++++++++++++++++++++- docs/undate/converters.rst | 35 +++++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 37b8452..9c8e898 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,7 @@ An `UndateInterval` is a date range between two `Undate` objects. Intervals can ``` You can initialize `Undate` or `UndateInterval` objects by parsing a date string with a specific converter, and you can also output an `Undate` object in those formats. -Available converters are "ISO8601" and "EDTF" (but only) +Currently available converters are "ISO8601" and "EDTF" and supported calendars. ```python >>> from undate import Undate @@ -156,6 +156,33 @@ Available converters are "ISO8601" and "EDTF" (but only) ``` +### Calendars + +All `Undate` objects are calendar aware, and date converters include support for parsing and working with dates from other calendars. The Gregorian calendar is used by default; currently `undate` supports the Hijri Islamic calendar and the Anno Mundi Hebrew calendar based on calendar convertion logic implemented in the [convertdate](https://convertdate.readthedocs.io/en/latest/)package. + +Dates are stored with the year, month, day and appropriate precision for the original calendar; internally, earliest and latest dates are calculated in Gregorian / Proleptic Gregorian calendar for standardized comparison across dates from different calendars. + +```python +>>> from undate import Undate +>>> tammuz4816 = Undate.parse("26 Tammuz 4816", "Hebrew") +>>> tammuz4816 + +>>> rajab495 = Undate.parse("Rajab 495", "Hijri") +>>> rajab495 + +>>> y2k = Undate.parse("2001", "EDTF") +>>> y2k + +>>> [str(d.earliest) for d in [rajab495, tammuz4816, y2k]] +['1102-04-28', '1056-07-17', '2001-01-01'] +>>> [str(d.precision) for d in [rajab495, tammuz4816, y2k]] +['MONTH', 'DAY', 'YEAR'] +>>> sorted([rajab495, tammuz4816, y2k]) +[, , ] +``` + +* * * + For more examples, refer to the [example notebooks](https://github.com/dh-tech/undate-python/tree/main/examples/notebooks/) included in this repository. ## Documentation diff --git a/docs/undate/converters.rst b/docs/undate/converters.rst index 701aaf1..57e90a1 100644 --- a/docs/undate/converters.rst +++ b/docs/undate/converters.rst @@ -1,19 +1,25 @@ Converters ========== +Overview +-------- + .. automodule:: undate.converters.base :members: :undoc-members: +Formats +-------- + ISO8601 -------- +^^^^^^^ .. automodule:: undate.converters.iso8601 :members: :undoc-members: Extended Date-Time Format (EDTF) --------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. automodule:: undate.converters.edtf.converter :members: @@ -23,8 +29,25 @@ Extended Date-Time Format (EDTF) :members: :undoc-members: -.. transformer is more of an internal, probably doesn't make sense to include -.. .. automodule:: undate.converters.edtf.transformer -.. :members: -.. :undoc-members: + +Calendars +--------- + +Gregorian +^^^^^^^^^ + +.. automodule:: undate.converters.calendars.gregorian + :members: + +Hijri (Islamic calendar) +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: undate.converters.calendars.hijri.converter + :members: + +Anno Mundi (Hebrew calendar) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: undate.converters.calendars.hebrew.converter + :members: From 4372b237e78d0491eaf1328608817f62fd83aaeb Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 20 Dec 2024 15:40:58 -0500 Subject: [PATCH 43/77] Address coverage issues flagged by codecov --- src/undate/converters/edtf/transformer.py | 11 +++-------- tests/test_converters/test_base.py | 14 +++++++++++++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/undate/converters/edtf/transformer.py b/src/undate/converters/edtf/transformer.py index 135c93b..d5bcfcb 100644 --- a/src/undate/converters/edtf/transformer.py +++ b/src/undate/converters/edtf/transformer.py @@ -54,24 +54,19 @@ def year_unspecified(self, items): return Tree(data="year", children=[value]) def month_unspecified(self, items): + # combine multiple parts into a single string value = "".join(self.get_values(items)) return Tree(data="month", children=[value]) def day_unspecified(self, items): + # combine multiple parts into a single string value = "".join(self.get_values(items)) return Tree(data="day", children=[value]) def date_level1(self, items): return self.date(items) - def year(self, items): - # when the year is negative, there are two tokens - if len(items) > 1 and items[0] == "-": - # an anonymous token for the - and the integer year - year = items[1] - return Tree(data="year", children=[-year]) - - return Tree(data="year", children=[items[0]]) + # year (including negative years) use default transformation def year_fivedigitsplus(self, items): # strip off the leading Y and convert to integer diff --git a/tests/test_converters/test_base.py b/tests/test_converters/test_base.py index 1426f13..c9578e4 100644 --- a/tests/test_converters/test_base.py +++ b/tests/test_converters/test_base.py @@ -1,7 +1,7 @@ import logging import pytest -from undate.converters.base import BaseDateConverter +from undate.converters.base import BaseDateConverter, BaseCalendarConverter class TestBaseDateConverter: @@ -62,3 +62,15 @@ class ISO8601DateFormat2(BaseDateConverter): assert len(BaseDateConverter.available_converters()) != len( BaseDateConverter.subclasses() ) + + +class TestBaseCalendarConverter: + def test_not_implemented(self): + with pytest.raises(NotImplementedError): + BaseCalendarConverter().min_month() + with pytest.raises(NotImplementedError): + BaseCalendarConverter().max_month(1900) + with pytest.raises(NotImplementedError): + BaseCalendarConverter().max_day(1900, 12) + with pytest.raises(NotImplementedError): + BaseCalendarConverter().to_gregorian(1900, 12, 31) From f6c43821cfc0230fc2256107852cfb42957eaf0c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 6 Feb 2025 17:42:51 -0500 Subject: [PATCH 44/77] Find all converter subclasses / descendants --- src/undate/converters/base.py | 17 +++++++++++------ tests/test_converters/test_base.py | 17 +++++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 5fefe49..e57faca 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -114,7 +114,7 @@ def available_converters(cls) -> Dict[str, Type["BaseDateConverter"]]: return {c.name: c for c in cls.subclasses()} # type: ignore @classmethod - def subclasses(cls) -> list[Type["BaseDateConverter"]]: + def subclasses(cls) -> set[Type["BaseDateConverter"]]: """ List of available converters classes. Includes calendar convert subclasses. @@ -123,11 +123,16 @@ def subclasses(cls) -> list[Type["BaseDateConverter"]]: cls.import_converters() # find all direct subclasses, excluding base calendar converter - subclasses = cls.__subclasses__() - subclasses.remove(BaseCalendarConverter) - # add all subclasses of calendar converter base class - subclasses.extend(BaseCalendarConverter.__subclasses__()) - return subclasses + direct_subclasses = cls.__subclasses__() + all_subclasses = set(direct_subclasses) + # recurse to find nested subclasses + for subc in direct_subclasses: + # print(f"class subclasses: {subc.name} {subc.subclasses()}") + all_subclasses |= subc.subclasses() + + # omit the calendar converter base class, which is not itself a converter + all_subclasses -= {BaseCalendarConverter} + return all_subclasses class BaseCalendarConverter(BaseDateConverter): diff --git a/tests/test_converters/test_base.py b/tests/test_converters/test_base.py index c9578e4..6420ec7 100644 --- a/tests/test_converters/test_base.py +++ b/tests/test_converters/test_base.py @@ -2,6 +2,11 @@ import pytest from undate.converters.base import BaseDateConverter, BaseCalendarConverter +from undate.converters.calendars import ( + GregorianDateConverter, + HebrewDateConverter, + HijriDateConverter, +) class TestBaseDateConverter: @@ -29,6 +34,18 @@ def test_parse_to_string(self): with pytest.raises(NotImplementedError): BaseDateConverter().to_string(1991) + def test_subclasses(self): + # define a nested subclass + class SubSubConverter(HijriDateConverter): + pass + + subclasses = BaseDateConverter.subclasses() + assert BaseCalendarConverter not in subclasses + assert HijriDateConverter in subclasses + assert HebrewDateConverter in subclasses + assert GregorianDateConverter in subclasses + assert SubSubConverter in subclasses + def test_import_converters_import_only_once(caplog): # clear the cache, since any instantiation of an Undate From 0deb9adaf0286e7531abe0cedf140e0a7c08db27 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 7 Feb 2025 10:23:13 -0500 Subject: [PATCH 45/77] Update docstring to describe the modified logic --- src/undate/converters/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index e57faca..775c550 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -116,8 +116,9 @@ def available_converters(cls) -> Dict[str, Type["BaseDateConverter"]]: @classmethod def subclasses(cls) -> set[Type["BaseDateConverter"]]: """ - List of available converters classes. Includes calendar convert - subclasses. + Set of available converters classes. Includes descendant + subclasses, including calendar converters, but does not include + :class:`BaseCalendarConverter`. """ # ensure undate converters are imported cls.import_converters() From 89e806d438456182d132143440ac5478469b1e79 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 7 Feb 2025 10:24:39 -0500 Subject: [PATCH 46/77] Document that duration logic is inclusive #63 --- src/undate/undate.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index fab277c..589d647 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -420,7 +420,9 @@ def duration(self) -> Timedelta: """What is the duration of this date? Calculate based on earliest and latest date within range, taking into account the precision of the date even if not all - parts of the date are known.""" + parts of the date are known. Note that durations are inclusive, + and include both the earliest and latest date rather than the + difference between them.""" # if precision is a single day, duration is one day # no matter when it is or what else is known @@ -541,6 +543,8 @@ def __eq__(self, other) -> bool: def duration(self) -> Timedelta: """Calculate the duration between two undates. + Note that durations are inclusive, and include both the earliest and latest + date rather than the difference between them. :returns: A duration :rtype: Timedelta From 48f11893cb6017cecd0f1a669a7ecb2b2aede7c9 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 7 Feb 2025 10:28:04 -0500 Subject: [PATCH 47/77] Add closed interval language to describe duration logic --- src/undate/undate.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 589d647..c12d022 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -420,9 +420,9 @@ def duration(self) -> Timedelta: """What is the duration of this date? Calculate based on earliest and latest date within range, taking into account the precision of the date even if not all - parts of the date are known. Note that durations are inclusive, - and include both the earliest and latest date rather than the - difference between them.""" + parts of the date are known. Note that durations are inclusive + (i.e., a closed interval) and include both the earliest and latest + date rather than the difference between them.""" # if precision is a single day, duration is one day # no matter when it is or what else is known @@ -543,8 +543,9 @@ def __eq__(self, other) -> bool: def duration(self) -> Timedelta: """Calculate the duration between two undates. - Note that durations are inclusive, and include both the earliest and latest - date rather than the difference between them. + Note that durations are inclusive (i.e., a closed interval), and + include both the earliest and latest date rather than the difference + between them. :returns: A duration :rtype: Timedelta From 6c8c63927a39c61ba25c9427c88e29a125828dd0 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 7 Feb 2025 10:39:43 -0500 Subject: [PATCH 48/77] Clean up commented out print statement --- src/undate/converters/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 775c550..fe7456b 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -128,7 +128,6 @@ def subclasses(cls) -> set[Type["BaseDateConverter"]]: all_subclasses = set(direct_subclasses) # recurse to find nested subclasses for subc in direct_subclasses: - # print(f"class subclasses: {subc.name} {subc.subclasses()}") all_subclasses |= subc.subclasses() # omit the calendar converter base class, which is not itself a converter From 43d101162c5e83d178a844f36f15f9a1f3e1bca2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 7 Feb 2025 15:04:16 -0500 Subject: [PATCH 49/77] Try using uv for unit test workflow --- .github/workflows/unit_tests.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 17a1c7a..67e1491 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -27,23 +27,29 @@ jobs: steps: - uses: actions/checkout@v3 + + # use github python action instead of uv to take advantage of caching - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} cache: 'pip' cache-dependency-path: '**/pyproject.toml' + + - name: Install uv + uses: astral-sh/setup-uv@v5 + - name: Install package with dependencies - run: pip install -e ".[test]" + run: uv sync --all-extras --dev # for all versions but the one we use for code coverage, run normally - name: Run unit tests normally - run: pytest + run: uv run pytest if: ${{ matrix.python != env.COV_PYTHON_VERSION }} # run code coverage in one version only - name: Run unit tests with code coverage reporting - run: pytest --cov=undate + run: uv run pytest --cov=undate if: ${{ matrix.python == env.COV_PYTHON_VERSION }} - name: Upload test coverage to Codecov uses: codecov/codecov-action@v3 From e58e50713e810b7613b2fb14c517c610d1ec65cf Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 7 Feb 2025 15:07:27 -0500 Subject: [PATCH 50/77] Remove pip caching; configure uv cache --- .github/workflows/unit_tests.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 67e1491..9b73f46 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -29,15 +29,16 @@ jobs: - uses: actions/checkout@v3 # use github python action instead of uv to take advantage of caching - - name: Set up Python ${{ matrix.python-version }} + - name: Set up Python ${{ matrix.python }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} - cache: 'pip' - cache-dependency-path: '**/pyproject.toml' - name: Install uv uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: "pyproject.toml" - name: Install package with dependencies run: uv sync --all-extras --dev From e8cdf62220704fa120514a5475658e546a4f592c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 7 Feb 2025 15:18:02 -0500 Subject: [PATCH 51/77] Try using uv for check workflow --- .github/workflows/check.yml | 21 +++++++++++++-------- .github/workflows/unit_tests.yml | 4 ++-- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 3f3b4e7..5436854 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -17,23 +17,28 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.12" - cache: 'pip' - cache-dependency-path: '**/pyproject.toml' - - name: Install package with development dependencies - run: pip install -e ".[dev]" + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: "pyproject.toml" + + - name: Install package with dependencies + run: uv sync --all-extras --dev # check with ruff - name: Run ruff - run: ruff check + run: uv run ruff check # check docs build - name: Check that documentation builds with no errors or warnings - run: sphinx-build docs docs/_build --fail-on-warning + run: uv run sphinx-build docs docs/_build --fail-on-warning # check types with mypy - name: Check types in python src directory; install needed types - run: mypy --install-types --non-interactive src + run: uv run mypy --install-types --non-interactive src # use treon to make sure that example notebooks run - name: Check jupyter notebooks with treon - run: treon + run: uv run treon diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 9b73f46..c2f115d 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -26,11 +26,11 @@ jobs: working-directory: . steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # use github python action instead of uv to take advantage of caching - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} From d56115cdb3e46f9eb9c90b8b359c3aa7f88c23b8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 7 Feb 2025 15:20:57 -0500 Subject: [PATCH 52/77] Some example notebooks require jupyter & pandas --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index f1ad9a7..2c4c3d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ classifiers = [ [project.optional-dependencies] docs = ["sphinx>=7.0.0", "alabaster", "myst-parser", "myst-parser[linkify]"] test = ["pytest>=7.2", "pytest-ordering", "pytest-cov"] +examples = ["jupyterlab", "pandas"] dev = [ "ruff", "pre-commit>=2.20.0", From a0f4b0553b528c6eb5b8da3d9ac6b76c1ea8031b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 7 Feb 2025 15:34:28 -0500 Subject: [PATCH 53/77] Install only the necessary dependencies for test/check workflow --- .github/workflows/check.yml | 4 ++-- .github/workflows/unit_tests.yml | 4 ++-- pyproject.toml | 7 +++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 5436854..ae450b4 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -24,8 +24,8 @@ jobs: enable-cache: true cache-dependency-glob: "pyproject.toml" - - name: Install package with dependencies - run: uv sync --all-extras --dev + - name: Install package with check dependencies + run: uv sync --extra check # check with ruff - name: Run ruff diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index c2f115d..d41b6ce 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -40,8 +40,8 @@ jobs: enable-cache: true cache-dependency-glob: "pyproject.toml" - - name: Install package with dependencies - run: uv sync --all-extras --dev + - name: Install package with dev and test dependencies + run: uv sync --extra test # for all versions but the one we use for code coverage, run normally - name: Run unit tests normally diff --git a/pyproject.toml b/pyproject.toml index 2c4c3d2..0f69d09 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,15 +50,14 @@ classifiers = [ [project.optional-dependencies] docs = ["sphinx>=7.0.0", "alabaster", "myst-parser", "myst-parser[linkify]"] test = ["pytest>=7.2", "pytest-ordering", "pytest-cov"] -examples = ["jupyterlab", "pandas"] +notebooks = ["jupyterlab", "pandas", "treon"] +check = ["undate[docs]", "undate[notebooks]", "mypy", "ruff"] dev = [ - "ruff", "pre-commit>=2.20.0", "twine", "wheel", "build", - "mypy", - "treon", + "undate[check]", "undate[docs]", "undate[test]", ] From 0fefcec38abe3e850acc18557868d7a34ac8e5bb Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 20 Feb 2025 16:46:57 -0500 Subject: [PATCH 54/77] Move interval object and tests into separate files --- docs/undate/core.rst | 7 +- src/undate/__init__.py | 5 +- .../converters/calendars/hebrew/converter.py | 2 +- .../calendars/hebrew/transformer.py | 2 +- .../converters/calendars/hijri/converter.py | 2 +- .../converters/calendars/hijri/transformer.py | 2 +- src/undate/converters/edtf/converter.py | 3 +- src/undate/converters/edtf/transformer.py | 2 +- src/undate/converters/iso8601.py | 2 +- src/undate/interval.py | 125 +++++++++++++++ src/undate/undate.py | 107 +------------ .../edtf/test_edtf_transformer.py | 3 +- tests/test_converters/test_edtf.py | 2 +- tests/test_converters/test_iso8601.py | 2 +- tests/test_interval.py | 145 ++++++++++++++++++ tests/test_undate.py | 119 +------------- 16 files changed, 299 insertions(+), 231 deletions(-) create mode 100644 src/undate/interval.py create mode 100644 tests/test_interval.py diff --git a/docs/undate/core.rst b/docs/undate/core.rst index e7b6b4b..4cc3e6b 100644 --- a/docs/undate/core.rst +++ b/docs/undate/core.rst @@ -1,13 +1,16 @@ Undate objects ============== -undates and undate intervals +dates, intervals, and calendar ------------------------------ .. autoclass:: undate.undate.Undate :members: -.. autoclass:: undate.undate.UndateInterval +.. autoclass:: undate.undate.Calendar + :members: + +.. autoclass:: undate.interval.UndateInterval :members: date, timedelta, and date precision diff --git a/src/undate/__init__.py b/src/undate/__init__.py index 290f83f..00cedc3 100644 --- a/src/undate/__init__.py +++ b/src/undate/__init__.py @@ -1,6 +1,7 @@ __version__ = "0.4.0.dev0" from undate.date import DatePrecision -from undate.undate import Undate, UndateInterval +from undate.undate import Undate, Calendar +from undate.interval import UndateInterval -__all__ = ["Undate", "UndateInterval", "DatePrecision", "__version__"] +__all__ = ["Undate", "UndateInterval", "Calendar", "DatePrecision", "__version__"] diff --git a/src/undate/converters/calendars/hebrew/converter.py b/src/undate/converters/calendars/hebrew/converter.py index b8b4620..d540021 100644 --- a/src/undate/converters/calendars/hebrew/converter.py +++ b/src/undate/converters/calendars/hebrew/converter.py @@ -3,10 +3,10 @@ from convertdate import hebrew # type: ignore from lark.exceptions import UnexpectedCharacters +from undate import Undate, UndateInterval from undate.converters.base import BaseCalendarConverter from undate.converters.calendars.hebrew.parser import hebrew_parser from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer -from undate.undate import Undate, UndateInterval class HebrewDateConverter(BaseCalendarConverter): diff --git a/src/undate/converters/calendars/hebrew/transformer.py b/src/undate/converters/calendars/hebrew/transformer.py index a6d2888..48e8b20 100644 --- a/src/undate/converters/calendars/hebrew/transformer.py +++ b/src/undate/converters/calendars/hebrew/transformer.py @@ -1,6 +1,6 @@ from lark import Transformer, Tree -from undate.undate import Undate, Calendar +from undate import Undate, Calendar class HebrewUndate(Undate): diff --git a/src/undate/converters/calendars/hijri/converter.py b/src/undate/converters/calendars/hijri/converter.py index b4b81b1..12a04d8 100644 --- a/src/undate/converters/calendars/hijri/converter.py +++ b/src/undate/converters/calendars/hijri/converter.py @@ -3,10 +3,10 @@ from convertdate import islamic # type: ignore from lark.exceptions import UnexpectedCharacters +from undate import Undate, UndateInterval from undate.converters.base import BaseCalendarConverter from undate.converters.calendars.hijri.parser import hijri_parser from undate.converters.calendars.hijri.transformer import HijriDateTransformer -from undate.undate import Undate, UndateInterval class HijriDateConverter(BaseCalendarConverter): diff --git a/src/undate/converters/calendars/hijri/transformer.py b/src/undate/converters/calendars/hijri/transformer.py index b575df9..8b78b2c 100644 --- a/src/undate/converters/calendars/hijri/transformer.py +++ b/src/undate/converters/calendars/hijri/transformer.py @@ -1,6 +1,6 @@ from lark import Transformer, Tree -from undate.undate import Undate, Calendar +from undate import Undate, Calendar class HijriUndate(Undate): diff --git a/src/undate/converters/edtf/converter.py b/src/undate/converters/edtf/converter.py index 95a1364..d0b742f 100644 --- a/src/undate/converters/edtf/converter.py +++ b/src/undate/converters/edtf/converter.py @@ -2,11 +2,12 @@ from lark.exceptions import UnexpectedCharacters +from undate import Undate, UndateInterval from undate.converters.base import BaseDateConverter from undate.converters.edtf.parser import edtf_parser from undate.converters.edtf.transformer import EDTFTransformer from undate.date import DatePrecision -from undate.undate import Undate, UndateInterval + #: character for unspecified digits EDTF_UNSPECIFIED_DIGIT: str = "X" diff --git a/src/undate/converters/edtf/transformer.py b/src/undate/converters/edtf/transformer.py index d5bcfcb..0b1de76 100644 --- a/src/undate/converters/edtf/transformer.py +++ b/src/undate/converters/edtf/transformer.py @@ -1,6 +1,6 @@ from lark import Token, Transformer, Tree -from undate.undate import Undate, UndateInterval +from undate import Undate, UndateInterval class EDTFTransformer(Transformer): diff --git a/src/undate/converters/iso8601.py b/src/undate/converters/iso8601.py index 09399eb..4f05b69 100644 --- a/src/undate/converters/iso8601.py +++ b/src/undate/converters/iso8601.py @@ -1,7 +1,7 @@ from typing import Dict, List, Union +from undate import Undate, UndateInterval from undate.converters.base import BaseDateConverter -from undate.undate import Undate, UndateInterval class ISO8601DateFormat(BaseDateConverter): diff --git a/src/undate/interval.py b/src/undate/interval.py new file mode 100644 index 0000000..787aa5a --- /dev/null +++ b/src/undate/interval.py @@ -0,0 +1,125 @@ +import datetime + +# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None +from typing import Optional, Union + + +from undate import Undate +from undate.date import ONE_DAY, ONE_YEAR, Timedelta +from undate.converters.base import BaseDateConverter + + +class UndateInterval: + """A date range between two uncertain dates. + + :param earliest: Earliest undate + :type earliest: `undate.Undate` + :param latest: Latest undate + :type latest: `undate.Undate` + :param label: A string to label a specific undate interval, similar to labels of `undate.Undate`. + :type label: `str` + """ + + # date range between two undates + earliest: Union[Undate, None] + latest: Union[Undate, None] + label: Union[str, None] + + # TODO: let's think about adding an optional precision / length /size field + # using DatePrecision + + def __init__( + self, + earliest: Optional[Undate] = None, + latest: Optional[Undate] = None, + label: Optional[str] = None, + ): + # for now, assume takes two undate objects; + # support conversion from datetime + if earliest and not isinstance(earliest, Undate): + # NOTE: some overlap with Undate._comparison_type method + # maybe support conversion from other formats later + if isinstance(earliest, datetime.date): + earliest = Undate.from_datetime_date(earliest) + else: + raise ValueError( + f"earliest date {earliest} cannot be converted to Undate" + ) + if latest and not isinstance(latest, Undate): + if isinstance(latest, datetime.date): + latest = Undate.from_datetime_date(latest) + else: + raise ValueError(f"latest date {latest} cannot be converted to Undate") + + # check that the interval is valid + if latest and earliest and latest <= earliest: + raise ValueError(f"invalid interval {earliest}-{latest}") + + self.earliest = earliest + self.latest = latest + self.label = label + + def __str__(self) -> str: + # using EDTF syntax for open ranges + return "%s/%s" % (self.earliest or "..", self.latest or "") + + def format(self, format) -> str: + """format this undate interval as a string using the specified format; + for now, only supports named converters""" + converter_cls = BaseDateConverter.available_converters().get(format, None) + print(f"converter_cls == {converter_cls}") + if converter_cls: + return converter_cls().to_string(self) + + raise ValueError(f"Unsupported format '{format}'") + + def __repr__(self) -> str: + if self.label: + return "" % (self.label, self) + return "" % self + + def __eq__(self, other) -> bool: + # consider interval equal if both dates are equal + return self.earliest == other.earliest and self.latest == other.latest + + def duration(self) -> Timedelta: + """Calculate the duration between two undates. + Note that durations are inclusive (i.e., a closed interval), and + include both the earliest and latest date rather than the difference + between them. + + :returns: A duration + :rtype: Timedelta + """ + # what is the duration of this date range? + + # if range is open-ended, can't calculate + if self.earliest is None or self.latest is None: + return NotImplemented + + # if both years are known, subtract end of range from beginning of start + if self.latest.known_year and self.earliest.known_year: + return self.latest.latest - self.earliest.earliest + ONE_DAY + + # if neither year is known... + elif not self.latest.known_year and not self.earliest.known_year: + # under what circumstances can we assume that if both years + # are unknown the dates are in the same year or sequential? + duration = self.latest.earliest - self.earliest.earliest + # if we get a negative, we've wrapped from end of one year + # to the beginning of the next; + # recalculate assuming second date is in the subsequent year + if duration.days < 0: + end = self.latest.earliest + ONE_YEAR + duration = end - self.earliest.earliest + + # add the additional day *after* checking for a negative + # or after recalculating with adjusted year + duration += ONE_DAY + + return duration + + else: + # is there any meaningful way to calculate duration + # if one year is known and the other is not? + raise NotImplementedError diff --git a/src/undate/undate.py b/src/undate/undate.py index c12d022..2008914 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -1,8 +1,12 @@ -import datetime -import re +from __future__ import annotations +import datetime from enum import auto +import re +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from undate.interval import UndateInterval try: # StrEnum was only added in python 3.11 from enum import StrEnum @@ -14,7 +18,7 @@ from typing import Dict, Optional, Union from undate.converters.base import BaseDateConverter -from undate.date import ONE_DAY, ONE_MONTH_MAX, ONE_YEAR, Date, DatePrecision, Timedelta +from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta class Calendar(StrEnum): @@ -218,7 +222,7 @@ def __repr__(self) -> str: return f"" @classmethod - def parse(cls, date_string, format) -> Union["Undate", "UndateInterval"]: + def parse(cls, date_string, format) -> Union["Undate", UndateInterval]: """parse a string to an undate or undate interval using the specified format; for now, only supports named converters""" converter_cls = BaseDateConverter.available_converters().get(format, None) @@ -487,98 +491,3 @@ def _missing_digit_minmax( min_val = int("".join(new_min_val)) max_val = int("".join(new_max_val)) return (min_val, max_val) - - -class UndateInterval: - """A date range between two uncertain dates. - - :param earliest: Earliest undate - :type earliest: `undate.Undate` - :param latest: Latest undate - :type latest: `undate.Undate` - :param label: A string to label a specific undate interval, similar to labels of `undate.Undate`. - :type label: `str` - """ - - # date range between two undates - earliest: Union[Undate, None] - latest: Union[Undate, None] - label: Union[str, None] - - # TODO: let's think about adding an optional precision / length /size field - # using DatePrecision - - def __init__( - self, - earliest: Optional[Undate] = None, - latest: Optional[Undate] = None, - label: Optional[str] = None, - ): - # for now, assume takes two undate objects - self.earliest = earliest - self.latest = latest - self.label = label - - def __str__(self) -> str: - # using EDTF syntax for open ranges - return "%s/%s" % (self.earliest or "..", self.latest or "") - - def format(self, format) -> str: - """format this undate interval as a string using the specified format; - for now, only supports named converters""" - converter_cls = BaseDateConverter.available_converters().get(format, None) - if converter_cls: - return converter_cls().to_string(self) - - raise ValueError(f"Unsupported format '{format}'") - - def __repr__(self) -> str: - if self.label: - return "" % (self.label, self) - return "" % self - - def __eq__(self, other) -> bool: - # consider interval equal if both dates are equal - return self.earliest == other.earliest and self.latest == other.latest - - def duration(self) -> Timedelta: - """Calculate the duration between two undates. - Note that durations are inclusive (i.e., a closed interval), and - include both the earliest and latest date rather than the difference - between them. - - :returns: A duration - :rtype: Timedelta - """ - # what is the duration of this date range? - - # if range is open-ended, can't calculate - if self.earliest is None or self.latest is None: - return NotImplemented - - # if both years are known, subtract end of range from beginning of start - if self.latest.known_year and self.earliest.known_year: - return self.latest.latest - self.earliest.earliest + ONE_DAY - - # if neither year is known... - elif not self.latest.known_year and not self.earliest.known_year: - # under what circumstances can we assume that if both years - # are unknown the dates are in the same year or sequential? - duration = self.latest.earliest - self.earliest.earliest - # if we get a negative, we've wrapped from end of one year - # to the beginning of the next; - # recalculate assuming second date is in the subsequent year - if duration.days < 0: - end = self.latest.earliest + ONE_YEAR - duration = end - self.earliest.earliest - - # add the additional day *after* checking for a negative - # or after recalculating with adjusted year - duration += ONE_DAY - - return duration - - else: - # is there any meaningful way to calculate duration - # if one year is known and the other is not? - raise NotImplementedError diff --git a/tests/test_converters/edtf/test_edtf_transformer.py b/tests/test_converters/edtf/test_edtf_transformer.py index 66488f6..3e82eb1 100644 --- a/tests/test_converters/edtf/test_edtf_transformer.py +++ b/tests/test_converters/edtf/test_edtf_transformer.py @@ -1,7 +1,8 @@ import pytest + +from undate import Undate, UndateInterval from undate.converters.edtf.parser import edtf_parser from undate.converters.edtf.transformer import EDTFTransformer -from undate.undate import Undate, UndateInterval # for now, just test that valid dates can be parsed diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index f159970..5210e98 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -1,7 +1,7 @@ import pytest from undate.converters.edtf import EDTFDateConverter from undate.date import DatePrecision -from undate.undate import Undate, UndateInterval +from undate import Undate, UndateInterval class TestEDTFDateConverter: diff --git a/tests/test_converters/test_iso8601.py b/tests/test_converters/test_iso8601.py index 73f645e..519eeb2 100644 --- a/tests/test_converters/test_iso8601.py +++ b/tests/test_converters/test_iso8601.py @@ -1,5 +1,5 @@ +from undate import Undate, UndateInterval from undate.converters.iso8601 import ISO8601DateFormat -from undate.undate import Undate, UndateInterval class TestISO8601DateFormat: diff --git a/tests/test_interval.py b/tests/test_interval.py new file mode 100644 index 0000000..dea8710 --- /dev/null +++ b/tests/test_interval.py @@ -0,0 +1,145 @@ +import calendar +import datetime + +import pytest + +from undate import Undate, UndateInterval +from undate.date import Timedelta + + +class TestUndateInterval: + def test_init_types(self): + # datetime.date - autoconvert + interval = UndateInterval(datetime.date(2022, 1, 1), None) + assert isinstance(interval.earliest, Undate) + interval = UndateInterval(None, datetime.date(2022, 1, 1)) + assert isinstance(interval.latest, Undate) + + # unsupported type should raise exception + with pytest.raises( + ValueError, match="earliest date 2022 cannot be converted to Undate" + ): + UndateInterval(2022, None) + + with pytest.raises( + ValueError, match="latest date 1982 cannot be converted to Undate" + ): + UndateInterval(None, "1982") + + def test_init_validation(self): + with pytest.raises(ValueError, match="invalid interval"): + UndateInterval(Undate(2020), Undate(1010)) + + def test_str(self): + # 2022 - 2023 + assert str(UndateInterval(Undate(2022), Undate(2023))) == "2022/2023" + # 2022 - 2023-05 + assert str(UndateInterval(Undate(2022), Undate(2023, 5))) == "2022/2023-05" + # 2022-11-01 to 2022-11-07 + assert ( + str(UndateInterval(Undate(2022, 11, 1), Undate(2023, 11, 7))) + == "2022-11-01/2023-11-07" + ) + + def test_format(self): + interval = UndateInterval(Undate(2000), Undate(2001)) + assert interval.format("EDTF") == "2000/2001" + assert interval.format("ISO8601") == "2000/2001" + + # Open-ended intervals + open_start = UndateInterval(latest=Undate(2000)) + assert open_start.format("EDTF") == "../2000" + assert open_start.format("ISO8601") == "/2000" + + open_end = UndateInterval(earliest=Undate(2000)) + assert open_end.format("EDTF") == "2000/.." + assert open_end.format("ISO8601") == "2000/" + + def test_repr(self): + assert ( + repr(UndateInterval(Undate(2022), Undate(2023))) + == "" + ) + assert ( + repr(UndateInterval(Undate(2022), Undate(2023), label="Fancy Epoch")) + == "" + ) + + def test_str_open_range(self): + # 900 - + assert str(UndateInterval(Undate(900))) == "0900/" + # - 1900 + assert str(UndateInterval(latest=Undate(1900))) == "../1900" + # - 1900-12 + assert str(UndateInterval(latest=Undate(1900, 12))) == "../1900-12" + + def test_eq(self): + assert UndateInterval(Undate(2022), Undate(2023)) == UndateInterval( + Undate(2022), Undate(2023) + ) + assert UndateInterval(Undate(2022), Undate(2023, 5)) == UndateInterval( + Undate(2022), Undate(2023, 5) + ) + assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5)) + + def test_not_eq(self): + assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval( + Undate(2022), Undate(2024) + ) + assert UndateInterval(Undate(2022), Undate(2023, 5)) != UndateInterval( + Undate(2022), Undate(2023, 6) + ) + assert UndateInterval(Undate(2022), Undate(2023, 5)) != UndateInterval( + Undate(2022), Undate(2023) + ) + assert UndateInterval(Undate(2022, 5)) != UndateInterval(Undate(2022, 6)) + + def test_min_year_non_leapyear(self): + assert not calendar.isleap(Undate.MIN_ALLOWABLE_YEAR) + + def test_duration(self): + week_duration = UndateInterval( + Undate(2022, 11, 1), Undate(2022, 11, 7) + ).duration() + assert isinstance(week_duration, Timedelta) + assert week_duration.days == 7 + + twomonths = UndateInterval(Undate(2022, 11), Undate(2022, 12)).duration() + # november - december = 30 days + 31 days + assert twomonths.days == 30 + 31 + + twoyears = UndateInterval(Undate(2021), Undate(2022)).duration() + assert twoyears.days == 365 * 2 + + # special case: month/day with no year (assumes same year) + week_noyear_duration = UndateInterval( + Undate(None, 11, 1), Undate(None, 11, 7) + ).duration() + assert week_noyear_duration.days == 7 + # special case 2: month/day with no year, wrapping from december to january + # (assumes sequential years) + month_noyear_duration = UndateInterval( + Undate(None, 12, 1), Undate(None, 1, 1) + ).duration() + assert month_noyear_duration.days == 32 + + # real world test cases from Shakespeare and Company Project data; + # second date is a year minus one day in the future + month_noyear_duration = UndateInterval( + Undate(None, 6, 7), Undate(None, 6, 6) + ).duration() + assert month_noyear_duration.days == 365 + + # durations that span february in unknown years should assume + # non-leap years + jan_march_duration = UndateInterval( + Undate(None, 2, 28), Undate(None, 3, 1) + ).duration() + assert jan_march_duration.days == 2 + + # duration is not supported for open-ended intervals + assert UndateInterval(Undate(2000), None).duration() == NotImplemented + + # one year set and the other not currently raises not implemented error + with pytest.raises(NotImplementedError): + UndateInterval(Undate(2000), Undate(month=10)).duration() diff --git a/tests/test_undate.py b/tests/test_undate.py index ecf0777..8f8a5c8 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,11 +1,10 @@ -import calendar from datetime import date import pytest +from undate import Undate, UndateInterval, Calendar from undate.converters.base import BaseCalendarConverter from undate.date import DatePrecision, Timedelta -from undate.undate import Undate, UndateInterval, Calendar class TestUndate: @@ -452,122 +451,6 @@ def test_format(self): Undate(1984).format("%Y-%m") -class TestUndateInterval: - def test_str(self): - # 2022 - 2023 - assert str(UndateInterval(Undate(2022), Undate(2023))) == "2022/2023" - # 2022 - 2023-05 - assert str(UndateInterval(Undate(2022), Undate(2023, 5))) == "2022/2023-05" - # 2022-11-01 to 2022-11-07 - assert ( - str(UndateInterval(Undate(2022, 11, 1), Undate(2023, 11, 7))) - == "2022-11-01/2023-11-07" - ) - - def test_format(self): - interval = UndateInterval(Undate(2000), Undate(2001)) - assert interval.format("EDTF") == "2000/2001" - assert interval.format("ISO8601") == "2000/2001" - - # Open-ended intervals - open_start = UndateInterval(latest=Undate(2000)) - assert open_start.format("EDTF") == "../2000" - assert open_start.format("ISO8601") == "/2000" - - open_end = UndateInterval(earliest=Undate(2000)) - assert open_end.format("EDTF") == "2000/.." - assert open_end.format("ISO8601") == "2000/" - - def test_repr(self): - assert ( - repr(UndateInterval(Undate(2022), Undate(2023))) - == "" - ) - assert ( - repr(UndateInterval(Undate(2022), Undate(2023), label="Fancy Epoch")) - == "" - ) - - def test_str_open_range(self): - # 900 - - assert str(UndateInterval(Undate(900))) == "0900/" - # - 1900 - assert str(UndateInterval(latest=Undate(1900))) == "../1900" - # - 1900-12 - assert str(UndateInterval(latest=Undate(1900, 12))) == "../1900-12" - - def test_eq(self): - assert UndateInterval(Undate(2022), Undate(2023)) == UndateInterval( - Undate(2022), Undate(2023) - ) - assert UndateInterval(Undate(2022), Undate(2023, 5)) == UndateInterval( - Undate(2022), Undate(2023, 5) - ) - assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5)) - - def test_not_eq(self): - assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval( - Undate(2022), Undate(2024) - ) - assert UndateInterval(Undate(2022), Undate(2023, 5)) != UndateInterval( - Undate(2022), Undate(2023, 6) - ) - assert UndateInterval(Undate(2022), Undate(2023, 5)) != UndateInterval( - Undate(2022), Undate(2023) - ) - assert UndateInterval(Undate(2022, 5)) != UndateInterval(Undate(2022, 6)) - - def test_min_year_non_leapyear(self): - assert not calendar.isleap(Undate.MIN_ALLOWABLE_YEAR) - - def test_duration(self): - week_duration = UndateInterval( - Undate(2022, 11, 1), Undate(2022, 11, 7) - ).duration() - assert isinstance(week_duration, Timedelta) - assert week_duration.days == 7 - - twomonths = UndateInterval(Undate(2022, 11), Undate(2022, 12)).duration() - # november - december = 30 days + 31 days - assert twomonths.days == 30 + 31 - - twoyears = UndateInterval(Undate(2021), Undate(2022)).duration() - assert twoyears.days == 365 * 2 - - # special case: month/day with no year (assumes same year) - week_noyear_duration = UndateInterval( - Undate(None, 11, 1), Undate(None, 11, 7) - ).duration() - assert week_noyear_duration.days == 7 - # special case 2: month/day with no year, wrapping from december to january - # (assumes sequential years) - month_noyear_duration = UndateInterval( - Undate(None, 12, 1), Undate(None, 1, 1) - ).duration() - assert month_noyear_duration.days == 32 - - # real world test cases from Shakespeare and Company Project data; - # second date is a year minus one day in the future - month_noyear_duration = UndateInterval( - Undate(None, 6, 7), Undate(None, 6, 6) - ).duration() - assert month_noyear_duration.days == 365 - - # durations that span february in unknown years should assume - # non-leap years - jan_march_duration = UndateInterval( - Undate(None, 2, 28), Undate(None, 3, 1) - ).duration() - assert jan_march_duration.days == 2 - - # duration is not supported for open-ended intervals - assert UndateInterval(Undate(2000), None).duration() == NotImplemented - - # one year set and the other not currently raises not implemented error - with pytest.raises(NotImplementedError): - UndateInterval(Undate(2000), Undate()).duration() - - def test_calendar_get_converter(): # ensure we can retrieve a calendar converter for each # calendar named in our calendar enum From 60aefc41426c66cdbfc3482361efaee2df3df11b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 20 Feb 2025 17:36:22 -0500 Subject: [PATCH 55/77] Update example notebook to import UndateInterval from new location --- examples/notebooks/shxco_partial_date_durations.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/notebooks/shxco_partial_date_durations.ipynb b/examples/notebooks/shxco_partial_date_durations.ipynb index 9e291f9..486981a 100644 --- a/examples/notebooks/shxco_partial_date_durations.ipynb +++ b/examples/notebooks/shxco_partial_date_durations.ipynb @@ -316,14 +316,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": { "id": "y_MqgrQW64uI" }, "outputs": [], "source": [ + "from undate import UndateInterval\n", "from undate.date import ONE_DAY\n", - "from undate.undate import UndateInterval\n", "from undate.converters.iso8601 import ISO8601DateFormat\n", "\n", "def undate_duration(start_date, end_date):\n", From 1607b98806acae946714262ddbdba6bd016e5086 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Mar 2025 18:42:41 -0500 Subject: [PATCH 56/77] Remove debug print statement flagged by code rabbit --- src/undate/interval.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index 787aa5a..33ec200 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -67,7 +67,6 @@ def format(self, format) -> str: """format this undate interval as a string using the specified format; for now, only supports named converters""" converter_cls = BaseDateConverter.available_converters().get(format, None) - print(f"converter_cls == {converter_cls}") if converter_cls: return converter_cls().to_string(self) From ffc1993bbf5c47a871c32df419f0018b853ed373 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 20 Feb 2025 16:23:53 -0500 Subject: [PATCH 57/77] Add validation and type conversion to interval init --- src/undate/undate.py | 4 ++++ tests/test_converters/test_edtf.py | 8 ++++---- tests/test_undate.py | 12 ++++++++---- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 2008914..f2d5300 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -72,6 +72,10 @@ def __init__( label: Optional[str] = None, calendar: Optional[Union[str, Calendar]] = None, ): + # everything is optional but something is required + if all([val is None for val in [year, month, day]]): + raise ValueError("At least one of year, month, or day must be specified") + # keep track of initial values and which values are known # TODO: add validation: if str, must be expected length self.initial_values: Dict[str, Optional[Union[int, str]]] = { diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index 5210e98..5c98446 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -64,8 +64,8 @@ def test_to_string(self): # if converter can't generate a string for the date, # it should return a value error - empty_undate = Undate() - empty_undate.precision = DatePrecision.DECADE - with pytest.raises(ValueError): - EDTFDateConverter().to_string(empty_undate) + # empty_undate = Undate() # undate with no date information no longer supported + # empty_undate.precision = DatePrecision.DECADE + # with pytest.raises(ValueError): + # EDTFDateConverter().to_string(empty_undate) # TODO: override missing digit and confirm replacement diff --git a/tests/test_undate.py b/tests/test_undate.py index 8f8a5c8..46fe973 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -132,7 +132,10 @@ def test_calendar(self): def test_init_invalid(self): with pytest.raises(ValueError): - Undate("19xx") + Undate("19??") + + with pytest.raises(ValueError, match="At least one of year, month, or day"): + Undate() def test_invalid_date(self): # invalid month should raise an error @@ -156,10 +159,11 @@ def test_year_property(self): # unset year assert Undate(month=12, day=31).year == "XXXX" + # NOTE: no longer supported to inistalize undate with no date information # force method to hit conditional for date precision - some_century = Undate() - some_century.precision = DatePrecision.CENTURY - assert some_century.year is None + # some_century = Undate() + # some_century.precision = DatePrecision.CENTURY + # assert some_century.year is None def test_month_property(self): # one, two digit month From 710c66a9a50c16d015bb412eceea29162b837865 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 20 Feb 2025 17:03:31 -0500 Subject: [PATCH 58/77] Implement & test an intersection method for UndateInterval --- src/undate/interval.py | 24 ++++++++++++++++++++++++ tests/test_interval.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/src/undate/interval.py b/src/undate/interval.py index 33ec200..eb91297 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -122,3 +122,27 @@ def duration(self) -> Timedelta: # is there any meaningful way to calculate duration # if one year is known and the other is not? raise NotImplementedError + + def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: + """Determine the intersection or overlap between two :class:`UndateInterval` + objects and return a new interval, or None if no overlap. + """ + try: + # when both values are defined, return the inner bounds; + # if not, return whichever is not None, or None + earliest = ( + max(self.earliest, other.earliest) + if self.earliest and other.earliest + else self.earliest or other.earliest + ) + latest = ( + min(self.latest, other.latest) + if self.latest and other.latest + else self.latest or other.latest + ) + + # if this results in an invalid interval, initialization + # will throw an exception + return UndateInterval(earliest, latest) + except ValueError: + return None diff --git a/tests/test_interval.py b/tests/test_interval.py index dea8710..3d49179 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -143,3 +143,35 @@ def test_duration(self): # one year set and the other not currently raises not implemented error with pytest.raises(NotImplementedError): UndateInterval(Undate(2000), Undate(month=10)).duration() + + def test_intersection(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + # no intersection + assert century11th.intersection(century20th) is None + # should work in either direction + assert century20th.intersection(century11th) is None + + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # intersection of an interval completely contained in another + # returns an interval equivalent to the smaller one + assert century20th.intersection(decade1990s) == decade1990s + assert decade1990s.intersection(century20th) == decade1990s + + # partial overlap + nineties_oughts = UndateInterval(Undate(1990), Undate(2009)) + assert century20th.intersection(nineties_oughts) == UndateInterval( + Undate(1990), Undate(2000) + ) + + # intersections between half open intervals + after_c11th = UndateInterval(Undate(1001), None) + assert after_c11th.intersection(century20th) == century20th + assert after_c11th.intersection(decade1990s) == decade1990s + + before_20th = UndateInterval(None, Undate(1901)) + assert before_20th.intersection(decade1990s) is None + assert before_20th.intersection(century11th) == century11th + assert before_20th.intersection(after_c11th) == UndateInterval( + Undate(1001), Undate(1901) + ) From 298bb19ba8911dd98a1f4b804750fa30f56196db Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 21 Feb 2025 11:38:48 -0500 Subject: [PATCH 59/77] Make conversion to undate more reusable and extensible --- src/undate/interval.py | 23 +++++++++------------ src/undate/undate.py | 45 +++++++++++++++++++++++++++--------------- tests/test_undate.py | 11 ++++++++--- 3 files changed, 46 insertions(+), 33 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index eb91297..33c3046 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -1,5 +1,3 @@ -import datetime - # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Optional, Union @@ -34,21 +32,18 @@ def __init__( latest: Optional[Undate] = None, label: Optional[str] = None, ): - # for now, assume takes two undate objects; - # support conversion from datetime - if earliest and not isinstance(earliest, Undate): - # NOTE: some overlap with Undate._comparison_type method - # maybe support conversion from other formats later - if isinstance(earliest, datetime.date): - earliest = Undate.from_datetime_date(earliest) - else: + # takes two undate objects; allows conversion from supported types + if earliest: + try: + earliest = Undate.to_undate(earliest) + except TypeError: raise ValueError( f"earliest date {earliest} cannot be converted to Undate" ) - if latest and not isinstance(latest, Undate): - if isinstance(latest, datetime.date): - latest = Undate.from_datetime_date(latest) - else: + if latest: + try: + latest = Undate.to_undate(latest) + except TypeError: raise ValueError(f"latest date {latest} cannot be converted to Undate") # check that the interval is valid diff --git a/src/undate/undate.py b/src/undate/undate.py index f2d5300..1b9671e 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -2,11 +2,13 @@ import datetime from enum import auto + import re from typing import TYPE_CHECKING if TYPE_CHECKING: from undate.interval import UndateInterval + try: # StrEnum was only added in python 3.11 from enum import StrEnum @@ -246,23 +248,19 @@ def format(self, format) -> str: raise ValueError(f"Unsupported format '{format}'") - def _comparison_type(self, other: object) -> "Undate": + @classmethod + def _comparison_type(cls, other: object) -> "Undate": """Common logic for type handling in comparison methods. Converts to Undate object if possible, otherwise raises - NotImplemented error. Currently only supports conversion - from :class:`datetime.date` + NotImplementedError exception. Uses :meth:`to_undate` for conversion. """ - - # support datetime.date by converting to undate - if isinstance(other, datetime.date): - other = Undate.from_datetime_date(other) - - # recommended to support comparison with arbitrary objects - if not isinstance(other, Undate): + # convert if possible; return NotImplemented if not + try: + return cls.to_undate(other) + except TypeError: + # recommended to support comparison with arbitrary objects return NotImplemented - return other - def __eq__(self, other: object) -> bool: # Note: assumes label differences don't matter for comparing dates @@ -272,6 +270,8 @@ def __eq__(self, other: object) -> bool: other = self._comparison_type(other) if other is NotImplemented: + # return NotImplemented to indicate comparison is not supported + # with this type return NotImplemented # if both dates are fully known, then earliest/latest check @@ -363,10 +363,23 @@ def __contains__(self, other: object) -> bool: ] ) - @staticmethod - def from_datetime_date(dt_date: datetime.date): - """Initialize an :class:`Undate` object from a :class:`datetime.date`""" - return Undate(dt_date.year, dt_date.month, dt_date.day) + @classmethod + def to_undate(cls, other: object) -> "Undate": + """Converted arbitrary object to Undate, if possible. Raises TypeError + if conversion is not possible. + + Currently suppports: + - :class:`datetime.date` or :class:`datetime.datetime` + + """ + match other: + case Undate(): + return other + case datetime.date() | datetime.datetime(): + return Undate(other.year, other.month, other.day) + + case _: + raise TypeError(f"Conversion from {type(other)} is not supported") @property def known_year(self) -> bool: diff --git a/tests/test_undate.py b/tests/test_undate.py index 46fe973..b3ba4fe 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,4 +1,4 @@ -from datetime import date +from datetime import date, datetime import pytest @@ -142,11 +142,16 @@ def test_invalid_date(self): with pytest.raises(ValueError): Undate(1990, 22) - def test_from_datetime_date(self): - undate_from_date = Undate.from_datetime_date(date(2001, 3, 5)) + def test_to_undate(self): + undate_from_date = Undate.to_undate(date(2001, 3, 5)) assert isinstance(undate_from_date, Undate) assert undate_from_date == Undate(2001, 3, 5) + now = datetime.now() + undate_from_dt = Undate.to_undate(now) + assert isinstance(undate_from_dt, Undate) + assert undate_from_dt == Undate(now.year, now.month, now.day) + # test properties for accessing parts of date def test_year_property(self): # two, three, four five digit years; numeric and string From fc4f7a92e693e5e068503bbd7ced47f989560a1e Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 21 Feb 2025 11:51:16 -0500 Subject: [PATCH 60/77] Drop support for python 3.9 so we can use match/case --- .github/workflows/unit_tests.yml | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 17a1c7a..381b231 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python: ["3.10", "3.11", "3.12", "3.13"] defaults: run: working-directory: . diff --git a/pyproject.toml b/pyproject.toml index f1ad9a7..8bcf839 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "undate" description = "library for working with uncertain, fuzzy, or partially unknown dates and date intervals" readme = "README.md" license = { text = "Apache-2" } -requires-python = ">= 3.9" +requires-python = ">= 3.10" dynamic = ["version"] dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"] authors = [ @@ -31,7 +31,6 @@ keywords = [ classifiers = [ "Development Status :: 2 - Pre-Alpha", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", From a4f2e7bd322d74b2db6ef4684b8ef25f9a0f7a86 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 21 Feb 2025 12:01:53 -0500 Subject: [PATCH 61/77] Add more type checks and tests --- src/undate/interval.py | 3 +++ tests/test_interval.py | 6 ++++++ tests/test_undate.py | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/src/undate/interval.py b/src/undate/interval.py index 33c3046..4472d67 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -73,6 +73,9 @@ def __repr__(self) -> str: return "" % self def __eq__(self, other) -> bool: + # currently doesn't support comparison with any other types + if not isinstance(other, UndateInterval): + return NotImplemented # consider interval equal if both dates are equal return self.earliest == other.earliest and self.latest == other.latest diff --git a/tests/test_interval.py b/tests/test_interval.py index 3d49179..254f3c7 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -82,6 +82,12 @@ def test_eq(self): ) assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5)) + def test_eq_type_check(self): + # doesn't currently support comparison with anything else + interval = UndateInterval(Undate(900)) + # returns NotIplemented if comparison with this type is not supported + assert interval.__eq__("foo") == NotImplemented + def test_not_eq(self): assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval( Undate(2022), Undate(2024) diff --git a/tests/test_undate.py b/tests/test_undate.py index b3ba4fe..a9087c2 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -152,6 +152,10 @@ def test_to_undate(self): assert isinstance(undate_from_dt, Undate) assert undate_from_dt == Undate(now.year, now.month, now.day) + # unsupported type + with pytest.raises(TypeError): + Undate.to_undate("foo") + # test properties for accessing parts of date def test_year_property(self): # two, three, four five digit years; numeric and string From b09c9fc5a354ccb9f4e172b76818a0630607ebb8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Mar 2025 19:12:42 -0500 Subject: [PATCH 62/77] Remove unused import --- tests/test_converters/test_edtf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index 5c98446..3262e46 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -1,6 +1,5 @@ import pytest from undate.converters.edtf import EDTFDateConverter -from undate.date import DatePrecision from undate import Undate, UndateInterval From f06960ae7273d5f4257680a2675f91db7ae92f73 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 13 Mar 2025 16:11:06 -0400 Subject: [PATCH 63/77] Use raise from err on type error in interval init Based on @coderabbitai feedback --- src/undate/interval.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index 4472d67..262bd5b 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -36,15 +36,17 @@ def __init__( if earliest: try: earliest = Undate.to_undate(earliest) - except TypeError: + except TypeError as err: raise ValueError( f"earliest date {earliest} cannot be converted to Undate" - ) + ) from err if latest: try: latest = Undate.to_undate(latest) - except TypeError: - raise ValueError(f"latest date {latest} cannot be converted to Undate") + except TypeError as err: + raise ValueError( + f"latest date {latest} cannot be converted to Undate" + ) from err # check that the interval is valid if latest and earliest and latest <= earliest: @@ -123,7 +125,7 @@ def duration(self) -> Timedelta: def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: """Determine the intersection or overlap between two :class:`UndateInterval` - objects and return a new interval, or None if no overlap. + objects and return a new interval. Returns None if there is no overlap. """ try: # when both values are defined, return the inner bounds; From 9ee14ef2ed46fc213d5cdf14987da450a8df2381 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 13 Mar 2025 16:48:03 -0400 Subject: [PATCH 64/77] Add and test contains/in method for interval --- src/undate/interval.py | 39 ++++++++++++++++++++++++++++++++-- tests/test_interval.py | 48 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index 262bd5b..8e6cd2f 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -23,8 +23,8 @@ class UndateInterval: latest: Union[Undate, None] label: Union[str, None] - # TODO: let's think about adding an optional precision / length /size field - # using DatePrecision + # TODO: think about adding an optional precision / length /size field + # using DatePrecision for intervals of any standard duration (decade, century) def __init__( self, @@ -123,6 +123,41 @@ def duration(self) -> Timedelta: # if one year is known and the other is not? raise NotImplementedError + def __contains__(self, other: object) -> bool: + """Determine if another interval or date falls within this + interval.""" + # support comparison with another interval + if isinstance(other, UndateInterval): + # if two intervals are strictly equal, don't consider + # either one as containing the other + if self == other: + return False + # otherwise compare based on earliest/latest bounds + other_earliest = other.earliest + other_latest = other.latest + else: + # otherwise, try to convert to an Undate + try: + other = Undate.to_undate(other) + other_latest = other_earliest = other + except TypeError: + # if conversion fails, then we don't support comparison + raise + + # if either bound of the current interval is None, + # then it is an open interval and we don't need to check the other value. + # if the other value is set, then check that it falls within the + # bounds of this interval + return ( + self.earliest is None + or other_earliest is not None + and other_earliest >= self.earliest + ) and ( + self.latest is None + or other_latest is not None + and other_latest <= self.latest + ) + def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: """Determine the intersection or overlap between two :class:`UndateInterval` objects and return a new interval. Returns None if there is no overlap. diff --git a/tests/test_interval.py b/tests/test_interval.py index 254f3c7..4552c05 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -181,3 +181,51 @@ def test_intersection(self): assert before_20th.intersection(after_c11th) == UndateInterval( Undate(1001), Undate(1901) ) + + def test_contains(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # an interval doesn't contain itself + for interval in [century11th, century20th, decade1990s]: + assert interval not in interval + + # checking if an interval is within another interval + assert decade1990s in century20th + assert decade1990s not in century11th + assert century11th not in decade1990s + assert century20th not in decade1990s + # a specific date can be contained by an interval + y2k = Undate(2000) + assert y2k in century20th + assert y2k not in century11th + # partially known date should work too + april_someyear = Undate("198X", 4) + assert april_someyear in century20th + assert april_someyear not in century11th + # conversion from datetime.date also works + assert datetime.date(1922, 5, 1) in century20th + # unsupported types result in a type error + with pytest.raises(TypeError): + "nineteen-eighty-four" in century20th + + # contains check with half-open intervals + after_c11th = UndateInterval(Undate(1001), None) + before_20th = UndateInterval(None, Undate(1901)) + # neither of them contains the other + assert after_c11th not in before_20th + assert before_20th not in after_c11th + # nor are they contained by a smaller range + assert after_c11th not in decade1990s + assert before_20th not in decade1990s + + # all of our previous test dates are in the 1900s, + # so they are after the 11th century and not before the 20th + for period in [decade1990s, y2k, april_someyear]: + assert period in after_c11th + assert period not in before_20th + + # fully open interval - is this even meaningful? + whenever = UndateInterval(None, None) + assert decade1990s in whenever + assert whenever not in whenever From 700c8348ef9f6e377ce1dab510fdfb2a82105421 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 13 Mar 2025 18:06:20 -0400 Subject: [PATCH 65/77] Address nitpicks flagged by @coderabbitai --- tests/test_interval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_interval.py b/tests/test_interval.py index 4552c05..40713b1 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -85,7 +85,7 @@ def test_eq(self): def test_eq_type_check(self): # doesn't currently support comparison with anything else interval = UndateInterval(Undate(900)) - # returns NotIplemented if comparison with this type is not supported + # returns NotImplemented if comparison with this type is not supported assert interval.__eq__("foo") == NotImplemented def test_not_eq(self): @@ -207,7 +207,7 @@ def test_contains(self): assert datetime.date(1922, 5, 1) in century20th # unsupported types result in a type error with pytest.raises(TypeError): - "nineteen-eighty-four" in century20th + assert "nineteen-eighty-four" in century20th # contains check with half-open intervals after_c11th = UndateInterval(Undate(1001), None) From cfdef424b008bbdd73c6eaf70f63f4f53bf726a0 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Sat, 5 Apr 2025 14:32:10 -0400 Subject: [PATCH 66/77] Revise contains logic: interval contains itself or equivalent interval --- src/undate/interval.py | 12 +++++------- tests/test_interval.py | 8 +++++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index 8e6cd2f..96950cf 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -125,14 +125,12 @@ def duration(self) -> Timedelta: def __contains__(self, other: object) -> bool: """Determine if another interval or date falls within this - interval.""" - # support comparison with another interval + interval. Supports comparison with :class:`UndateInterval` + or anything that can be converted with :meth:`Undate.to_undate`.""" + # support comparison with another interval or anything + # that can be converted to an Undate if isinstance(other, UndateInterval): - # if two intervals are strictly equal, don't consider - # either one as containing the other - if self == other: - return False - # otherwise compare based on earliest/latest bounds + # compare based on earliest/latest bounds other_earliest = other.earliest other_latest = other.latest else: diff --git a/tests/test_interval.py b/tests/test_interval.py index 40713b1..3101b2d 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -186,9 +186,9 @@ def test_contains(self): century11th = UndateInterval(Undate(1001), Undate(1100)) century20th = UndateInterval(Undate(1901), Undate(2000)) decade1990s = UndateInterval(Undate(1990), Undate(1999)) - # an interval doesn't contain itself + # an interval DOES contain itself for interval in [century11th, century20th, decade1990s]: - assert interval not in interval + assert interval in interval # checking if an interval is within another interval assert decade1990s in century20th @@ -228,4 +228,6 @@ def test_contains(self): # fully open interval - is this even meaningful? whenever = UndateInterval(None, None) assert decade1990s in whenever - assert whenever not in whenever + # NOTE: an interval contains itself or an equivalent interval, + # but that may not make sense for open intervals... + assert whenever in whenever From fb694b7156432733a21d2a64ac0ef7ade5f79d8c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 11 Apr 2025 09:30:03 -0400 Subject: [PATCH 67/77] Support conversion from internal Date class to Undate #119 --- src/undate/undate.py | 5 ++++- tests/test_undate.py | 10 +++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 1b9671e..d3891ad 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -365,7 +365,7 @@ def __contains__(self, other: object) -> bool: @classmethod def to_undate(cls, other: object) -> "Undate": - """Converted arbitrary object to Undate, if possible. Raises TypeError + """Convert arbitrary object to Undate, if possible. Raises TypeError if conversion is not possible. Currently suppports: @@ -377,6 +377,9 @@ def to_undate(cls, other: object) -> "Undate": return other case datetime.date() | datetime.datetime(): return Undate(other.year, other.month, other.day) + case Date(): + # handle conversion from internal Date class + return Undate(other.year, other.month, other.day) case _: raise TypeError(f"Conversion from {type(other)} is not supported") diff --git a/tests/test_undate.py b/tests/test_undate.py index a9087c2..7a140df 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -4,7 +4,7 @@ from undate import Undate, UndateInterval, Calendar from undate.converters.base import BaseCalendarConverter -from undate.date import DatePrecision, Timedelta +from undate.date import Date, DatePrecision, Timedelta class TestUndate: @@ -152,6 +152,14 @@ def test_to_undate(self): assert isinstance(undate_from_dt, Undate) assert undate_from_dt == Undate(now.year, now.month, now.day) + # from internal Date object + y2k = Date(2000) + y2k_to_undate = Undate.to_undate(y2k) + assert isinstance(y2k_to_undate, Undate) + assert int(y2k_to_undate.year) == y2k.year + assert y2k_to_undate.month is None + assert y2k_to_undate.day is None + # unsupported type with pytest.raises(TypeError): Undate.to_undate("foo") From 1f873bc3f1dd2ff937c25297008a374948eec4a7 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 11 Apr 2025 09:50:56 -0400 Subject: [PATCH 68/77] Use `islamic` for Islamic/Hijri calendar classes #120 --- README.md | 2 +- docs/undate/converters.rst | 12 +-- src/undate/converters/base.py | 6 +- src/undate/converters/calendars/__init__.py | 4 +- .../converters/calendars/hijri/__init__.py | 3 - .../converters/calendars/hijri/parser.py | 9 -- .../converters/calendars/islamic/__init__.py | 3 + .../calendars/{hijri => islamic}/converter.py | 26 +++--- .../hijri.lark => islamic/islamic.lark} | 2 +- .../converters/calendars/islamic/parser.py | 9 ++ .../{hijri => islamic}/transformer.py | 14 +-- src/undate/undate.py | 2 +- tests/test_converters/test_base.py | 6 +- .../test_islamic_converter.py} | 92 +++++++++---------- .../test_islamic_parser.py} | 6 +- .../test_islamic_transformer.py} | 32 +++---- tests/test_undate.py | 8 +- 17 files changed, 118 insertions(+), 118 deletions(-) delete mode 100644 src/undate/converters/calendars/hijri/__init__.py delete mode 100644 src/undate/converters/calendars/hijri/parser.py create mode 100644 src/undate/converters/calendars/islamic/__init__.py rename src/undate/converters/calendars/{hijri => islamic}/converter.py (70%) rename src/undate/converters/calendars/{hijri/hijri.lark => islamic/islamic.lark} (96%) create mode 100644 src/undate/converters/calendars/islamic/parser.py rename src/undate/converters/calendars/{hijri => islamic}/transformer.py (76%) rename tests/test_converters/test_calendars/{test_hijri/test_hijri_converter.py => test_islamic/test_islamic_converter.py} (59%) rename tests/test_converters/test_calendars/{test_hijri/test_hijri_parser.py => test_islamic/test_islamic_parser.py} (90%) rename tests/test_converters/test_calendars/{test_hijri/test_hijri_transformer.py => test_islamic/test_islamic_transformer.py} (59%) diff --git a/README.md b/README.md index 9c8e898..83f54de 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,7 @@ Currently available converters are "ISO8601" and "EDTF" and supported calendars. ### Calendars -All `Undate` objects are calendar aware, and date converters include support for parsing and working with dates from other calendars. The Gregorian calendar is used by default; currently `undate` supports the Hijri Islamic calendar and the Anno Mundi Hebrew calendar based on calendar convertion logic implemented in the [convertdate](https://convertdate.readthedocs.io/en/latest/)package. +All `Undate` objects are calendar aware, and date converters include support for parsing and working with dates from other calendars. The Gregorian calendar is used by default; currently `undate` supports the Islamic Hijri calendar and the Hebrew Anno Mundi calendar based on calendar conversion logic implemented in the [convertdate](https://convertdate.readthedocs.io/en/latest/)package. Dates are stored with the year, month, day and appropriate precision for the original calendar; internally, earliest and latest dates are calculated in Gregorian / Proleptic Gregorian calendar for standardized comparison across dates from different calendars. diff --git a/docs/undate/converters.rst b/docs/undate/converters.rst index 57e90a1..b93b81e 100644 --- a/docs/undate/converters.rst +++ b/docs/undate/converters.rst @@ -39,15 +39,15 @@ Gregorian .. automodule:: undate.converters.calendars.gregorian :members: -Hijri (Islamic calendar) -^^^^^^^^^^^^^^^^^^^^^^^^ +Hebrew Anno Mundi calendar +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. automodule:: undate.converters.calendars.hijri.converter +.. automodule:: undate.converters.calendars.hebrew.converter :members: -Anno Mundi (Hebrew calendar) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Islamic Hijri calendar +^^^^^^^^^^^^^^^^^^^^^^^^ -.. automodule:: undate.converters.calendars.hebrew.converter +.. automodule:: undate.converters.calendars.islamic.converter :members: diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index fe7456b..04db129 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -3,7 +3,7 @@ implementing date converters, which can provide support for parsing and generating dates in different formats. The converter subclass :class:`undate.converters.BaseCalendarConverter` -provides additional functionaly needed for calendar conversion. +provides additional functionality needed for calendar conversion. To add support for a new date converter: @@ -23,10 +23,10 @@ - Create a new file under ``undate/converters/calendars/`` - For converters with sufficient complexity, you may want to create a submodule; - see ``undate.converters.calendars.hijri`` for an example. + see ``undate.converters.calendars.islamic`` for an example. - Extend ``BaseCalendarConverter`` and implement ``parse`` and ``to_string`` formatter methods as desired/appropriate for your converter as well as the - additional methods for ``max_month``, ``max_day``, and convertion ``to_gregorian`` + additional methods for ``max_month``, ``max_day``, and conversion ``to_gregorian`` calendar. - Import your calendar in ``undate/converters/calendars/__init__.py`` and include in `__all__`` - Add unit tests for the new calendar logic under ``tests/test_converters/calendars/`` diff --git a/src/undate/converters/calendars/__init__.py b/src/undate/converters/calendars/__init__.py index c14e115..a43a270 100644 --- a/src/undate/converters/calendars/__init__.py +++ b/src/undate/converters/calendars/__init__.py @@ -1,5 +1,5 @@ from undate.converters.calendars.gregorian import GregorianDateConverter -from undate.converters.calendars.hijri import HijriDateConverter from undate.converters.calendars.hebrew import HebrewDateConverter +from undate.converters.calendars.islamic import IslamicDateConverter -__all__ = ["HijriDateConverter", "GregorianDateConverter", "HebrewDateConverter"] +__all__ = ["GregorianDateConverter", "HebrewDateConverter", "IslamicDateConverter"] diff --git a/src/undate/converters/calendars/hijri/__init__.py b/src/undate/converters/calendars/hijri/__init__.py deleted file mode 100644 index 4ac5b4b..0000000 --- a/src/undate/converters/calendars/hijri/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from undate.converters.calendars.hijri.converter import HijriDateConverter - -__all__ = ["HijriDateConverter"] diff --git a/src/undate/converters/calendars/hijri/parser.py b/src/undate/converters/calendars/hijri/parser.py deleted file mode 100644 index 273cdf9..0000000 --- a/src/undate/converters/calendars/hijri/parser.py +++ /dev/null @@ -1,9 +0,0 @@ -import pathlib - -from lark import Lark - -grammar_path = pathlib.Path(__file__).parent / "hijri.lark" - -with open(grammar_path) as grammar: - # NOTE: LALR parser is faster but can't be used to ambiguity between years and dates - hijri_parser = Lark(grammar.read(), start="hijri_date", strict=True) diff --git a/src/undate/converters/calendars/islamic/__init__.py b/src/undate/converters/calendars/islamic/__init__.py new file mode 100644 index 0000000..ffbb9d2 --- /dev/null +++ b/src/undate/converters/calendars/islamic/__init__.py @@ -0,0 +1,3 @@ +from undate.converters.calendars.islamic.converter import IslamicDateConverter + +__all__ = ["IslamicDateConverter"] diff --git a/src/undate/converters/calendars/hijri/converter.py b/src/undate/converters/calendars/islamic/converter.py similarity index 70% rename from src/undate/converters/calendars/hijri/converter.py rename to src/undate/converters/calendars/islamic/converter.py index 12a04d8..fb5e870 100644 --- a/src/undate/converters/calendars/hijri/converter.py +++ b/src/undate/converters/calendars/islamic/converter.py @@ -5,24 +5,24 @@ from undate import Undate, UndateInterval from undate.converters.base import BaseCalendarConverter -from undate.converters.calendars.hijri.parser import hijri_parser -from undate.converters.calendars.hijri.transformer import HijriDateTransformer +from undate.converters.calendars.islamic.parser import islamic_parser +from undate.converters.calendars.islamic.transformer import IslamicDateTransformer -class HijriDateConverter(BaseCalendarConverter): +class IslamicDateConverter(BaseCalendarConverter): """ - Converter for Hijri / Islamic calendar. + Converter for Islamic Hijri calendar. - Support for parsing Hijri dates and converting to Undate and UndateInterval + Support for parsing Islamic Hijri dates and converting to Undate and UndateInterval objects in the Gregorian calendar. """ - #: converter name: Hijri - name: str = "Hijri" - calendar_name: str = "Hijrī" + #: converter name: Islamic + name: str = "Islamic" + calendar_name: str = "Islamic" def __init__(self): - self.transformer = HijriDateTransformer() + self.transformer = IslamicDateTransformer() def max_day(self, year: int, month: int) -> int: """maximum numeric day for the specified year and month in this calendar""" @@ -44,9 +44,9 @@ def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: def parse(self, value: str) -> Union[Undate, UndateInterval]: """ - Parse a Hijri date string and return an :class:`~undate.undate.Undate` or + Parse an Islamic/Hijri date string and return an :class:`~undate.undate.Undate` or :class:`~undate.undate.UndateInterval`. - The Hijri date string is preserved in the undate label. + The Islamic/Hijri date string is preserved in the undate label. """ if not value: raise ValueError("Parsing empty string is not supported") @@ -54,14 +54,14 @@ def parse(self, value: str) -> Union[Undate, UndateInterval]: # parse the input string, then transform to undate object try: # parse the string with our Hijri date parser - parsetree = hijri_parser.parse(value) + parsetree = islamic_parser.parse(value) # transform the parse tree into an undate or undate interval undate_obj = self.transformer.transform(parsetree) # set the original date as a label, with the calendar name undate_obj.label = f"{value} {self.calendar_name}" return undate_obj except UnexpectedCharacters as err: - raise ValueError(f"Could not parse '{value}' as a Hijri date") from err + raise ValueError(f"Could not parse '{value}' as an Islamic date") from err # do we need to support conversion the other direction? # i.e., generate a Hijri date from an abitrary undate or undate interval? diff --git a/src/undate/converters/calendars/hijri/hijri.lark b/src/undate/converters/calendars/islamic/islamic.lark similarity index 96% rename from src/undate/converters/calendars/hijri/hijri.lark rename to src/undate/converters/calendars/islamic/islamic.lark index 4e6ccc7..3ad59a5 100644 --- a/src/undate/converters/calendars/hijri/hijri.lark +++ b/src/undate/converters/calendars/islamic/islamic.lark @@ -3,7 +3,7 @@ // only support day month year format for now // parser requires numeric day and year to be distinguished based on order -hijri_date: day month year | month year | year +islamic_date: day month year | month year | year // TODO: handle date ranges? diff --git a/src/undate/converters/calendars/islamic/parser.py b/src/undate/converters/calendars/islamic/parser.py new file mode 100644 index 0000000..b103711 --- /dev/null +++ b/src/undate/converters/calendars/islamic/parser.py @@ -0,0 +1,9 @@ +import pathlib + +from lark import Lark + +grammar_path = pathlib.Path(__file__).parent / "islamic.lark" + +with open(grammar_path) as grammar: + # NOTE: LALR parser is faster but can't be used due to ambiguity between years and days + islamic_parser = Lark(grammar.read(), start="islamic_date", strict=True) diff --git a/src/undate/converters/calendars/hijri/transformer.py b/src/undate/converters/calendars/islamic/transformer.py similarity index 76% rename from src/undate/converters/calendars/hijri/transformer.py rename to src/undate/converters/calendars/islamic/transformer.py index 8b78b2c..9ffce36 100644 --- a/src/undate/converters/calendars/hijri/transformer.py +++ b/src/undate/converters/calendars/islamic/transformer.py @@ -3,17 +3,17 @@ from undate import Undate, Calendar -class HijriUndate(Undate): - """Undate convience subclass; sets default calendar to Hijri.""" +class IslamicUndate(Undate): + """Undate convience subclass; sets default calendar to Islamic.""" - calendar = Calendar.HIJRI + calendar = Calendar.ISLAMIC -class HijriDateTransformer(Transformer): - """Transform a Hijri date parse tree and return an Undate or +class IslamicDateTransformer(Transformer): + """Transform an Islamic Hijri date parse tree and return an Undate or UndateInterval.""" - def hijri_date(self, items): + def islamic_date(self, items): parts = {} for child in items: if child.data in ["year", "month", "day"]: @@ -24,7 +24,7 @@ def hijri_date(self, items): # initialize and return an undate with islamic year, month, day and # islamic calendar - return HijriUndate(**parts) + return IslamicUndate(**parts) # year translation is not needed since we want a tree with name year # this is equivalent to a no-op diff --git a/src/undate/undate.py b/src/undate/undate.py index 1b9671e..6647ad6 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -27,8 +27,8 @@ class Calendar(StrEnum): """Supported calendars""" GREGORIAN = auto() - HIJRI = auto() HEBREW = auto() + ISLAMIC = auto() @staticmethod def get_converter(calendar): diff --git a/tests/test_converters/test_base.py b/tests/test_converters/test_base.py index 6420ec7..a4ac52d 100644 --- a/tests/test_converters/test_base.py +++ b/tests/test_converters/test_base.py @@ -5,7 +5,7 @@ from undate.converters.calendars import ( GregorianDateConverter, HebrewDateConverter, - HijriDateConverter, + IslamicDateConverter, ) @@ -36,12 +36,12 @@ def test_parse_to_string(self): def test_subclasses(self): # define a nested subclass - class SubSubConverter(HijriDateConverter): + class SubSubConverter(IslamicDateConverter): pass subclasses = BaseDateConverter.subclasses() assert BaseCalendarConverter not in subclasses - assert HijriDateConverter in subclasses + assert IslamicDateConverter in subclasses assert HebrewDateConverter in subclasses assert GregorianDateConverter in subclasses assert SubSubConverter in subclasses diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py b/tests/test_converters/test_calendars/test_islamic/test_islamic_converter.py similarity index 59% rename from tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py rename to tests/test_converters/test_calendars/test_islamic/test_islamic_converter.py index 6541586..4acacd0 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_converter.py +++ b/tests/test_converters/test_calendars/test_islamic/test_islamic_converter.py @@ -1,40 +1,40 @@ import pytest -from undate.converters.calendars import HijriDateConverter -from undate.converters.calendars.hijri.transformer import HijriUndate +from undate.converters.calendars import IslamicDateConverter +from undate.converters.calendars.islamic.transformer import IslamicUndate from undate.undate import Calendar, Undate from undate.date import DatePrecision, Date -class TestHijriDateConverter: +class TestIslamicDateConverter: def test_parse(self): # day # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 date_str = "7 Jumādā I 1243" - date = HijriDateConverter().parse(date_str) - assert date == HijriUndate(1243, 5, 7) - assert date.calendar == Calendar.HIJRI + date = IslamicDateConverter().parse(date_str) + assert date == IslamicUndate(1243, 5, 7) + assert date.calendar == Calendar.ISLAMIC assert date.precision == DatePrecision.DAY - assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" + assert date.label == f"{date_str} {IslamicDateConverter.calendar_name}" # month date_str = "Rajab 495" - date = HijriDateConverter().parse(date_str) - assert date == HijriUndate(495, 7) # Rajab is month 7 - assert date.calendar == Calendar.HIJRI + date = IslamicDateConverter().parse(date_str) + assert date == IslamicUndate(495, 7) # Rajab is month 7 + assert date.calendar == Calendar.ISLAMIC assert date.precision == DatePrecision.MONTH - assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" + assert date.label == f"{date_str} {IslamicDateConverter.calendar_name}" # Gregorian earliest/ latest assert date.earliest == Date(1102, 4, 28) assert date.latest == Date(1102, 5, 27) # year date_str = "441" - date = HijriDateConverter().parse(date_str) - assert date == HijriUndate(441) - assert date.calendar == Calendar.HIJRI + date = IslamicDateConverter().parse(date_str) + assert date == IslamicUndate(441) + assert date.calendar == Calendar.ISLAMIC assert date.precision == DatePrecision.YEAR - assert date.label == f"{date_str} {HijriDateConverter.calendar_name}" + assert date.label == f"{date_str} {IslamicDateConverter.calendar_name}" # Gregorian earliest/ latest assert date.earliest == Date(1049, 6, 11) assert date.latest == Date(1050, 5, 31) @@ -43,108 +43,108 @@ def test_gregorian_earliest_latest(self): # earliest/latest should be converted to Gregorian for comparison # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 - date = HijriUndate(1243, 5, 7) + date = IslamicUndate(1243, 5, 7) assert date.earliest == Date(1827, 11, 26) assert date.latest == Date(1827, 11, 26) # Jumādā I 1243 : 1827-11-20 to 1827-12-19 - date = HijriUndate(1243, 5) + date = IslamicUndate(1243, 5) assert date.earliest == Date(1827, 11, 20) assert date.latest == Date(1827, 12, 19) # Rajab 495: 1102-04-28 to 1102-05-27 (Rajab = month 7) - date = HijriUndate(495, 7) + date = IslamicUndate(495, 7) assert date.earliest == Date(1102, 4, 28) assert date.latest == Date(1102, 5, 27) # 441 : 1049-06-11 to 1050-05-31 - date = HijriUndate(441) + date = IslamicUndate(441) assert date.earliest == Date(1049, 6, 11) assert date.latest == Date(1050, 5, 31) # examples from ISMI data (reformatted to day month year) # 14 Rabīʿ I 901 : 1495-12-11 (Rabi 1 = month 3 ) - date = HijriUndate(901, 3, 14) + date = IslamicUndate(901, 3, 14) assert date.earliest == Date(1495, 12, 11) assert date.latest == Date(1495, 12, 11) # 884 : 1479-04-03 to 1480-03-21 - date = HijriUndate(884) + date = IslamicUndate(884) assert date.earliest == Date(1479, 4, 3) assert date.latest == Date(1480, 3, 21) def test_parse_error(self): # a string we can't parse should raise an error with pytest.raises(ValueError): - HijriDateConverter().parse("January 2, 1991") + IslamicDateConverter().parse("January 2, 1991") # empty string should also error with pytest.raises(ValueError): - HijriDateConverter().parse("") + IslamicDateConverter().parse("") def test_partially_known(self): # hijri dates get existing partially unknown behavior - unknown_month = HijriUndate(1243, "XX") + unknown_month = IslamicUndate(1243, "XX") assert unknown_month.precision == DatePrecision.MONTH assert unknown_month.earliest == Date( - *HijriDateConverter().to_gregorian(1243, 1, 1) + *IslamicDateConverter().to_gregorian(1243, 1, 1) ) assert unknown_month.latest == Date( - *HijriDateConverter().to_gregorian(1243, 12, 30) + *IslamicDateConverter().to_gregorian(1243, 12, 30) ) - partially_unknown_month = HijriUndate(1243, "1X") + partially_unknown_month = IslamicUndate(1243, "1X") assert partially_unknown_month.precision == DatePrecision.MONTH assert partially_unknown_month.earliest == Date( - *HijriDateConverter().to_gregorian(1243, 10, 1) + *IslamicDateConverter().to_gregorian(1243, 10, 1) ) assert partially_unknown_month.latest == Date( - *HijriDateConverter().to_gregorian(1243, 12, 30) + *IslamicDateConverter().to_gregorian(1243, 12, 30) ) - unknown_day = HijriUndate(1243, 2, "XX") + unknown_day = IslamicUndate(1243, 2, "XX") assert unknown_day.precision == DatePrecision.DAY assert unknown_day.earliest == Date( - *HijriDateConverter().to_gregorian(1243, 2, 1) + *IslamicDateConverter().to_gregorian(1243, 2, 1) ) # second month has 29 days assert unknown_day.latest == Date( - *HijriDateConverter().to_gregorian(1243, 2, 29) + *IslamicDateConverter().to_gregorian(1243, 2, 29) ) - partially_unknown_day = HijriUndate(1243, 2, "2X") + partially_unknown_day = IslamicUndate(1243, 2, "2X") assert partially_unknown_day.precision == DatePrecision.DAY assert partially_unknown_day.earliest == Date( - *HijriDateConverter().to_gregorian(1243, 2, 20) + *IslamicDateConverter().to_gregorian(1243, 2, 20) ) assert partially_unknown_day.latest == Date( - *HijriDateConverter().to_gregorian(1243, 2, 29) + *IslamicDateConverter().to_gregorian(1243, 2, 29) ) def test_compare_across_calendars(self): # only day-precision dates can be exactly equal across calendars # 7 Jumādā I 1243 Hijrī : 26 November, 1827; Jumada I = month 5 - assert HijriUndate(1243, 5, 7) == Undate(1827, 11, 26) + assert IslamicUndate(1243, 5, 7) == Undate(1827, 11, 26) # 14 Rabīʿ I 901 : 1495-12-11 (Rabi 1 = month 3 ) - assert HijriUndate(901, 3, 14) == Undate(1495, 12, 11) + assert IslamicUndate(901, 3, 14) == Undate(1495, 12, 11) # greater than / less than - assert HijriUndate(901) < Undate(1500) - assert HijriUndate(901) > Undate(1450) + assert IslamicUndate(901) < Undate(1500) + assert IslamicUndate(901) > Undate(1450) # Jumādā I 1243 : 1827-11-20 to 1827-12-19 - assert HijriUndate(1243, 5) > Undate(1827, 10) - assert HijriUndate(1243, 5) < Undate(1828, 1) + assert IslamicUndate(1243, 5) > Undate(1827, 10) + assert IslamicUndate(1243, 5) < Undate(1828, 1) # 7 Jumādā I 1243 Hijrī : 26 November, 1827, so it falls # within (or is contained by) November 1827 - assert HijriUndate(1243, 5, 7) in Undate(1827, 11) - assert HijriUndate(1243, 5, 7) not in Undate(1827, 10) + assert IslamicUndate(1243, 5, 7) in Undate(1827, 11) + assert IslamicUndate(1243, 5, 7) not in Undate(1827, 10) # sorting sorted_dates = sorted( [ - HijriUndate(884), # 1479 to 1480 Gregorian - HijriUndate(441), # 1049 to 1050 Gregorian - HijriUndate(901), # 1495 to 1495 Gregorian + IslamicUndate(884), # 1479 to 1480 Gregorian + IslamicUndate(441), # 1049 to 1050 Gregorian + IslamicUndate(901), # 1495 to 1495 Gregorian Undate(1995), Undate(33), Undate(1350), diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py b/tests/test_converters/test_calendars/test_islamic/test_islamic_parser.py similarity index 90% rename from tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py rename to tests/test_converters/test_calendars/test_islamic/test_islamic_parser.py index 6b9c828..92ca94c 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_parser.py +++ b/tests/test_converters/test_calendars/test_islamic/test_islamic_parser.py @@ -1,5 +1,5 @@ import pytest -from undate.converters.calendars.hijri.parser import hijri_parser +from undate.converters.calendars.islamic.parser import islamic_parser # for now, just test that valid dates can be parsed @@ -49,7 +49,7 @@ @pytest.mark.parametrize("date_string", testcases) def test_should_parse(date_string): - assert hijri_parser.parse(date_string) + assert islamic_parser.parse(date_string) error_cases = [ @@ -73,4 +73,4 @@ def test_should_parse(date_string): @pytest.mark.parametrize("date_string", error_cases) def test_should_error(date_string): with pytest.raises(Exception): - hijri_parser.parse(date_string) + islamic_parser.parse(date_string) diff --git a/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py b/tests/test_converters/test_calendars/test_islamic/test_islamic_transformer.py similarity index 59% rename from tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py rename to tests/test_converters/test_calendars/test_islamic/test_islamic_transformer.py index 7ebc117..951a9f8 100644 --- a/tests/test_converters/test_calendars/test_hijri/test_hijri_transformer.py +++ b/tests/test_converters/test_calendars/test_islamic/test_islamic_transformer.py @@ -1,34 +1,34 @@ import pytest -from undate.converters.calendars.hijri.parser import hijri_parser -from undate.converters.calendars.hijri.transformer import ( - HijriDateTransformer, - HijriUndate, +from undate.converters.calendars.islamic.parser import islamic_parser +from undate.converters.calendars.islamic.transformer import ( + IslamicDateTransformer, + IslamicUndate, ) from undate.undate import Undate, Calendar from undate.date import DatePrecision -def test_hijri_undate(): - assert HijriUndate(848).calendar == Calendar.HIJRI +def test_islamic_undate(): + assert IslamicUndate(848).calendar == Calendar.ISLAMIC testcases = [ # examples from Princeton Geniza Project # date conversions checked with https://www.muqawwim.com/ # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 - ("7 Jumādā I 1243", HijriUndate(1243, 5, 7), DatePrecision.DAY), - ("Jumādā I 1243", HijriUndate(1243, 5), DatePrecision.MONTH), - ("1243", HijriUndate(1243), DatePrecision.YEAR), + ("7 Jumādā I 1243", IslamicUndate(1243, 5, 7), DatePrecision.DAY), + ("Jumādā I 1243", IslamicUndate(1243, 5), DatePrecision.MONTH), + ("1243", IslamicUndate(1243), DatePrecision.YEAR), # Gregorian: UndateInterval(Undate(1827, 7, 25), Undate(1828, 7, 13)), # Zū al-Qaʿdah / Dhu l-Qa'da = month 11 - ("27 Dhū l-Qaʿda 632", HijriUndate(632, 11, 27), DatePrecision.DAY), + ("27 Dhū l-Qaʿda 632", IslamicUndate(632, 11, 27), DatePrecision.DAY), # Rajab = month 7 - ("Rajab 495", HijriUndate(495, 7), DatePrecision.MONTH), - ("441", HijriUndate(441), DatePrecision.YEAR), + ("Rajab 495", IslamicUndate(495, 7), DatePrecision.MONTH), + ("441", IslamicUndate(441), DatePrecision.YEAR), # examples from ISMI data (reformatted to day month year) # Rabi 1 = month 3 - ("14 Rabīʿ I 901", HijriUndate(901, 3, 14), DatePrecision.DAY), - ("884", HijriUndate(884), DatePrecision.YEAR), + ("14 Rabīʿ I 901", IslamicUndate(901, 3, 14), DatePrecision.DAY), + ("884", IslamicUndate(884), DatePrecision.YEAR), # Gregorian: UndateInterval(Undate(1479, 4, 3), Undate(1480, 3, 21)), # add when we support parsing ranges: # 900 Muḥarram 1 - 999 Ḏu al-Ḥijjaẗ 29 : 1494-10-11 to 1591-10-18 @@ -37,9 +37,9 @@ def test_hijri_undate(): @pytest.mark.parametrize("date_string,expected,expected_precision", testcases) def test_transform(date_string, expected, expected_precision): - transformer = HijriDateTransformer(visit_tokens=True) + transformer = IslamicDateTransformer(visit_tokens=True) # parse the input string, then transform to undate object - parsetree = hijri_parser.parse(date_string) + parsetree = islamic_parser.parse(date_string) transformed_date = transformer.transform(parsetree) assert transformed_date == expected # currently only undates have date precision diff --git a/tests/test_undate.py b/tests/test_undate.py index a9087c2..dc9c1f4 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -31,7 +31,7 @@ def test_repr(self): repr(Undate(2022, 11, 7, label="A Special Day")) == "" ) - assert repr(Undate(484, calendar=Calendar.HIJRI)) == "" + assert repr(Undate(484, calendar=Calendar.ISLAMIC)) == "" def test_init_str(self): assert Undate("2000").earliest.year == 2000 @@ -122,10 +122,10 @@ def test_init_partially_known_day(self): def test_calendar(self): assert Undate(2024).calendar == Calendar.GREGORIAN # by name, any case - assert Undate(848, calendar="HIJRI").calendar == Calendar.HIJRI - assert Undate(848, calendar="hijri").calendar == Calendar.HIJRI + assert Undate(848, calendar="ISLAMIC").calendar == Calendar.ISLAMIC + assert Undate(848, calendar="islamic").calendar == Calendar.ISLAMIC # by enum - assert Undate(848, calendar=Calendar.HIJRI).calendar == Calendar.HIJRI + assert Undate(848, calendar=Calendar.ISLAMIC).calendar == Calendar.ISLAMIC # invalid with pytest.raises(ValueError, match="Calendar `foobar` is not supported"): Undate(848, calendar="foobar") From 48b0f626c65382af233e1d123827d36a1bf9cfba Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 11 Apr 2025 10:13:32 -0400 Subject: [PATCH 69/77] Reorganize examples and update readme / contents #121 --- examples/README.md | 16 ++-- examples/{notebooks => }/edtf-support.ipynb | 24 ++--- examples/{use-cases => }/ismi/README.md | 0 .../ismi/data/ismi-crm-date-samples.ttl | 0 examples/notebooks/README.md | 6 -- .../shakespeare-and-company-project/README.md | 0 .../SCoData_events_v1.2_2022-01.csv | 0 .../shxco_partial_date_durations.ipynb | 90 +++++++++---------- 8 files changed, 68 insertions(+), 68 deletions(-) rename examples/{notebooks => }/edtf-support.ipynb (99%) rename examples/{use-cases => }/ismi/README.md (100%) rename examples/{use-cases => }/ismi/data/ismi-crm-date-samples.ttl (100%) delete mode 100644 examples/notebooks/README.md rename examples/{use-cases => }/shakespeare-and-company-project/README.md (100%) rename examples/{use-cases => }/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv (100%) rename examples/{notebooks => shakespeare-and-company-project}/shxco_partial_date_durations.ipynb (99%) diff --git a/examples/README.md b/examples/README.md index bf2a99c..127d984 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,7 +1,13 @@ # undate examples -Example data and projects with use cases for uncertain date logic and -example code notebooks using undate. - -- [use cases](use-cases) - examples from projects or specific data with use cases for undate -- [notebooks](notebooks) - code notebooks showing how undate can be used on a specific dataset or for a specific problem +This folder contains code notebooks demonstrating how `undate` can be +used on a specific dataset, problem, or format, as well as example +data from projects with use cases for uncertain date logic. + +## Contents + +- [EDTF support](edtf-support.ipynb) - demonstrate and validate supported portions of the Extended Date Time Format (EDTF) specification (jupyter notebook) +- [ISMI](ismi) - Sample data from the Islamic Scientific Manuscript Initiative project +- [Shakespeare and Company Project](shakespeare-and-company-project) + - Events data from version 1.2 of Shakespeare and Company Project datasets + - [Partial date duration logic](shxco_partial_date_durations.ipynb) - compare `undate` partial date range duration logic with a previous implementation in the _Shakespeare and Company Project_ (jupyter notebook) diff --git a/examples/notebooks/edtf-support.ipynb b/examples/edtf-support.ipynb similarity index 99% rename from examples/notebooks/edtf-support.ipynb rename to examples/edtf-support.ipynb index 0295647..a604838 100644 --- a/examples/notebooks/edtf-support.ipynb +++ b/examples/edtf-support.ipynb @@ -55,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "9c6b7379-b2a7-4ec1-afa5-2cd9832c8a5d", "metadata": {}, "outputs": [], @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "923476ff-344a-4018-a02e-6e5f80ea76a8", "metadata": {}, "outputs": [], @@ -159,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "6ed422de-34a2-4324-b254-f62db00563f7", "metadata": {}, "outputs": [], @@ -212,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "8d98a139-627b-40bd-b1c5-d0028e538a53", "metadata": {}, "outputs": [], @@ -255,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "532470db-851e-4f91-9242-cd93d35054cf", "metadata": {}, "outputs": [], @@ -320,7 +320,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "a5abd0e4-0b26-49b0-bf78-3e1fe6c046d8", "metadata": {}, "outputs": [], @@ -425,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "e47f3fff-d35c-4c2e-9568-214763f6511a", "metadata": {}, "outputs": [], @@ -481,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "39143c1f-932a-450c-9b2d-ffbe3e1416b0", "metadata": {}, "outputs": [], @@ -535,7 +535,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "95965f17-0bd5-446f-bc09-9503eaed68e2", "metadata": {}, "outputs": [], @@ -589,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "c6c2d1a1-39f1-45eb-ac08-1de4fadbe842", "metadata": {}, "outputs": [], @@ -640,7 +640,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "f24fd31a-176a-40b5-bff4-d72b68f32a18", "metadata": {}, "outputs": [], @@ -688,7 +688,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "5910caab-eada-4715-b863-9bbbb15b9c5c", "metadata": {}, "outputs": [], diff --git a/examples/use-cases/ismi/README.md b/examples/ismi/README.md similarity index 100% rename from examples/use-cases/ismi/README.md rename to examples/ismi/README.md diff --git a/examples/use-cases/ismi/data/ismi-crm-date-samples.ttl b/examples/ismi/data/ismi-crm-date-samples.ttl similarity index 100% rename from examples/use-cases/ismi/data/ismi-crm-date-samples.ttl rename to examples/ismi/data/ismi-crm-date-samples.ttl diff --git a/examples/notebooks/README.md b/examples/notebooks/README.md deleted file mode 100644 index 17c1270..0000000 --- a/examples/notebooks/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# `undate` demo notebooks - -This folder contains code notebooks demonstrating how undate can be used on a specific dataset or for a specific problem. - -- [EDTF support](edtf-support.ipynb) - demonstrate and validate supported portions of the Extended Date Time Format (EDTF) specification -- [Partial date duration logic](shxco_partial_date_durations.ipynb) - compare `undate` partial date range duration logic with a previous implementation in the _Shakespeare and Company Project_ diff --git a/examples/use-cases/shakespeare-and-company-project/README.md b/examples/shakespeare-and-company-project/README.md similarity index 100% rename from examples/use-cases/shakespeare-and-company-project/README.md rename to examples/shakespeare-and-company-project/README.md diff --git a/examples/use-cases/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv b/examples/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv similarity index 100% rename from examples/use-cases/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv rename to examples/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv diff --git a/examples/notebooks/shxco_partial_date_durations.ipynb b/examples/shakespeare-and-company-project/shxco_partial_date_durations.ipynb similarity index 99% rename from examples/notebooks/shxco_partial_date_durations.ipynb rename to examples/shakespeare-and-company-project/shxco_partial_date_durations.ipynb index 486981a..38efa6c 100644 --- a/examples/notebooks/shxco_partial_date_durations.ipynb +++ b/examples/shakespeare-and-company-project/shxco_partial_date_durations.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -29,7 +29,7 @@ "output_type": "stream", "text": [ "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Users/rkoeser/workarea/env/undate/bin/python3 -m pip install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -288,7 +288,7 @@ "[5 rows x 28 columns]" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -296,8 +296,8 @@ "source": [ "import pandas as pd\n", "\n", - "# load the 1.2 version of S&co events dataset; we have a copy in our use-cases folder\n", - "events_df = pd.read_csv(\"../use-cases/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv\", low_memory=False)\n", + "# load the 1.2 version of S&co events dataset; expected to be in the sam folder as this notebook\n", + "events_df = pd.read_csv(\"./SCoData_events_v1.2_2022-01.csv\", low_memory=False)\n", "events_df.head()" ] }, @@ -316,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": { "id": "y_MqgrQW64uI" }, @@ -353,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -452,7 +452,7 @@ "260 4 months 122.0 " ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -478,7 +478,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -505,7 +505,7 @@ "Name: count, Length: 133, dtype: int64" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -517,7 +517,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -540,7 +540,7 @@ "Name: subscription_duration_days, dtype: float64" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -558,7 +558,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -607,7 +607,7 @@ "Index: []" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -619,7 +619,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -688,7 +688,7 @@ "13686 NaN 31.0 " ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -707,7 +707,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": { "id": "jwvN9-CgLQRx" }, @@ -727,7 +727,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -832,7 +832,7 @@ "260 4 months 122.0 152 days " ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -845,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -950,7 +950,7 @@ "260 4 months 122.0 152 days " ] }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -962,7 +962,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1161,7 +1161,7 @@ "[9144 rows x 7 columns]" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1174,7 +1174,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1200,7 +1200,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 14, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1220,7 +1220,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1403,7 +1403,7 @@ "313 30.0 " ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1416,7 +1416,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1443,7 +1443,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1455,7 +1455,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1686,7 +1686,7 @@ "472 30.0 60 days 30.0 " ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1709,7 +1709,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1957,7 +1957,7 @@ "415 29.0 " ] }, - "execution_count": 18, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1982,7 +1982,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2068,7 +2068,7 @@ "606 G. E. Pulsford --01-20 --01-28 8.0" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -2082,7 +2082,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2168,7 +2168,7 @@ "29908 Ann Samyn 1961-10-04 1962-03-21 168.0" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -2179,7 +2179,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2316,7 +2316,7 @@ "611 Gertrude Stein --01-24 --05-30 126.0 126 days" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2329,7 +2329,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2489,7 +2489,7 @@ "611 0.0 " ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2502,7 +2502,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -2519,7 +2519,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2551,7 +2551,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", From f4427240fcb0cf4836e02d94079838cfca674fdf Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 11 Apr 2025 10:16:44 -0400 Subject: [PATCH 70/77] Clean up additional mentions & typos flagged by coderabbitai --- README.md | 2 +- src/undate/converters/calendars/islamic/converter.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 83f54de..23ee260 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,7 @@ Currently available converters are "ISO8601" and "EDTF" and supported calendars. ### Calendars -All `Undate` objects are calendar aware, and date converters include support for parsing and working with dates from other calendars. The Gregorian calendar is used by default; currently `undate` supports the Islamic Hijri calendar and the Hebrew Anno Mundi calendar based on calendar conversion logic implemented in the [convertdate](https://convertdate.readthedocs.io/en/latest/)package. +All `Undate` objects are calendar aware, and date converters include support for parsing and working with dates from other calendars. The Gregorian calendar is used by default; currently `undate` supports the Islamic Hijri calendar and the Hebrew Anno Mundi calendar based on calendar conversion logic implemented in the [convertdate](https://convertdate.readthedocs.io/en/latest/) package. Dates are stored with the year, month, day and appropriate precision for the original calendar; internally, earliest and latest dates are calculated in Gregorian / Proleptic Gregorian calendar for standardized comparison across dates from different calendars. diff --git a/src/undate/converters/calendars/islamic/converter.py b/src/undate/converters/calendars/islamic/converter.py index fb5e870..c658c90 100644 --- a/src/undate/converters/calendars/islamic/converter.py +++ b/src/undate/converters/calendars/islamic/converter.py @@ -53,7 +53,7 @@ def parse(self, value: str) -> Union[Undate, UndateInterval]: # parse the input string, then transform to undate object try: - # parse the string with our Hijri date parser + # parse the string with our Islamic Hijri date parser parsetree = islamic_parser.parse(value) # transform the parse tree into an undate or undate interval undate_obj = self.transformer.transform(parsetree) @@ -64,4 +64,4 @@ def parse(self, value: str) -> Union[Undate, UndateInterval]: raise ValueError(f"Could not parse '{value}' as an Islamic date") from err # do we need to support conversion the other direction? - # i.e., generate a Hijri date from an abitrary undate or undate interval? + # i.e., generate an Islamic Hijri date from an arbitrary undate or undate interval? From 6d369289eefa51b168ce55b436c96c9d3e248113 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 11 Apr 2025 10:21:49 -0400 Subject: [PATCH 71/77] Fix shxco notebook path and augment description --- examples/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/README.md b/examples/README.md index 127d984..080a6ce 100644 --- a/examples/README.md +++ b/examples/README.md @@ -8,6 +8,6 @@ data from projects with use cases for uncertain date logic. - [EDTF support](edtf-support.ipynb) - demonstrate and validate supported portions of the Extended Date Time Format (EDTF) specification (jupyter notebook) - [ISMI](ismi) - Sample data from the Islamic Scientific Manuscript Initiative project -- [Shakespeare and Company Project](shakespeare-and-company-project) - - Events data from version 1.2 of Shakespeare and Company Project datasets - - [Partial date duration logic](shxco_partial_date_durations.ipynb) - compare `undate` partial date range duration logic with a previous implementation in the _Shakespeare and Company Project_ (jupyter notebook) +- [Shakespeare and Company Project](shakespeare-and-company-project) - data, description of partial date implementation, and example notebook + - Events data from version 1.2 of Shakespeare and Company Project datasets + - [Partial date duration logic](shakespeare-and-company-project/shxco_partial_date_durations.ipynb) - compare `undate` partial date range duration logic with a previous implementation in the _Shakespeare and Company Project_ (jupyter notebook) From 58d2717cda950783f1b984155754c4354c4c98f8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 31 Mar 2025 17:54:43 -0400 Subject: [PATCH 72/77] Set version to 0.4.0 final --- src/undate/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/undate/__init__.py b/src/undate/__init__.py index 00cedc3..0976d0e 100644 --- a/src/undate/__init__.py +++ b/src/undate/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.4.0.dev0" +__version__ = "0.4.0" from undate.date import DatePrecision from undate.undate import Undate, Calendar From 0677649dc6a40593dcf2e6d93312c09e3c916160 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 31 Mar 2025 18:03:54 -0400 Subject: [PATCH 73/77] Document changes since the last release --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23ec981..80737d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Change Log +## 0.4 + +- Make Undate Calendar aware / explicit; default is Gregorian + - Parsing and calendar conversion for Hebrew/Anno Mundi + - Parsing and calendar conversion for Islamic/Hijri + + ## 0.3.1 Update readthedocs config for current installation From 110f00b43196f26950082304666c4136991478e8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 11 Apr 2025 10:28:04 -0400 Subject: [PATCH 74/77] Change project status from pre-alpha to alpha --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6db5fde..2dc6515 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ keywords = [ "digital-humanities", ] classifiers = [ - "Development Status :: 2 - Pre-Alpha", + "Development Status :: 3 - Alpha", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", From 4d0b488a7751847734b4138efcc5f9abd3ee57b2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 11 Apr 2025 10:57:00 -0400 Subject: [PATCH 75/77] Document changes since last release --- CHANGELOG.md | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80737d2..5b27b6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,24 @@ ## 0.4 -- Make Undate Calendar aware / explicit; default is Gregorian - - Parsing and calendar conversion for Hebrew/Anno Mundi - - Parsing and calendar conversion for Islamic/Hijri +- Undate is now Calendar aware / Calendar explicit; default is Gregorian + - New `BaseCalendarConverter` class, with additional methods required for calendar converters + - `HebrewDateConverter`: Parsing and calendar conversion for Hebrew/Anno Mundi + - `IslamicDateConverter`: Parsing and calendar conversion for Islamic/Hijri + - `GregorianDateConverter`: basic Gregorian calendar logic + - `undate.Calendar` class to track `Undate` object calendar, and match with calendar converters +- BaseDateConverter class now includes nested/descendant subclasses when looking + for available converters +- `Undate.to_undate` method to convert supported date objects to `Undate` (`datetime.date`, `datetime.datetime`, and internal `undate.date.Date` class) +- `UndateInterval` improvements + - Can be initialized with `Undate` objects or any type supported by `Undate.to_undate` + - New method for contains (`in`), to determine if another interval or date is contained by an interval + - New method `intersection` to determine the overlap between two `UndateInterval` objects +- EDTF parser : fixed day parsing for some unsupported cases +- Dropped support for Python 3.9 +- Reorganized examples folder to avoid unnecessary nesting + - ISMI data has been updated from older JSON data to examples in RDF (turtle) + - ## 0.3.1 @@ -21,7 +36,7 @@ Update readthedocs config for current installation - Support 5+ digit years with leading Y (thanks to numpy.datetime64) - Jupyter notebook demonstrating / validating EDTF support - Full support for Level 0 Date and Time Interval (no Date and Time support) - - Level 1: + - Level 1: - Letter-prefixed calendar year - Unspecified digit from the right - Partial support for extended interval From 04f8b6508104bde82188ea46c7d50e178a85b3cd Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 11 Apr 2025 10:57:24 -0400 Subject: [PATCH 76/77] Update documentation to match current functionality --- README.md | 14 +++++++------- src/undate/undate.py | 3 ++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 23ee260..d1bc2e1 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ **undate** is a python library for working with uncertain or partially known dates. > [!WARNING] -> This is pre-alpha software and is **NOT** feature complete! Use with caution. -> Currently it only supports parsing and formatting dates in ISO8601 format and -> some portions of EDTF (Extended Date Time Format). +> This is alpha software and is not yet feature complete! Use with caution and give us feedback. +> Currently `undate` supports parsing and formatting dates in ISO8601, some +portions of EDTF (Extended Date Time Format), and parsing and conversion for dates in Hebrew Anno Mundi and Islamic Hijri calendars *Undate was initially created as part of a [DH-Tech](https://dh-tech.github.io/) hackathon in November 2022.* @@ -167,9 +167,9 @@ Dates are stored with the year, month, day and appropriate precision for the ori >>> tammuz4816 = Undate.parse("26 Tammuz 4816", "Hebrew") >>> tammuz4816 ->>> rajab495 = Undate.parse("Rajab 495", "Hijri") +>>> rajab495 = Undate.parse("Rajab 495", "Islamic") >>> rajab495 - + >>> y2k = Undate.parse("2001", "EDTF") >>> y2k @@ -178,12 +178,12 @@ Dates are stored with the year, month, day and appropriate precision for the ori >>> [str(d.precision) for d in [rajab495, tammuz4816, y2k]] ['MONTH', 'DAY', 'YEAR'] >>> sorted([rajab495, tammuz4816, y2k]) -[, , ] +[, , ] ``` * * * -For more examples, refer to the [example notebooks](https://github.com/dh-tech/undate-python/tree/main/examples/notebooks/) included in this repository. +For more examples, refer to the code notebooks included in the [examples](https://github.com/dh-tech/undate-python/tree/main/examples/) in this repository. ## Documentation diff --git a/src/undate/undate.py b/src/undate/undate.py index d581eb7..244bf2d 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -368,8 +368,9 @@ def to_undate(cls, other: object) -> "Undate": """Convert arbitrary object to Undate, if possible. Raises TypeError if conversion is not possible. - Currently suppports: + Currently supports: - :class:`datetime.date` or :class:`datetime.datetime` + - :class:`undate.date.Date` """ match other: From 6098361fe4d0d2bbaa0a0b75c80912871d7ae2aa Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 11 Apr 2025 11:14:15 -0400 Subject: [PATCH 77/77] Minor cleanup and fixes suggested by coderabbitai --- CHANGELOG.md | 1 - README.md | 2 +- src/undate/interval.py | 4 +++- src/undate/undate.py | 16 +++++++--------- .../test_islamic/test_islamic_parser.py | 4 +++- tests/test_interval.py | 5 ++++- 6 files changed, 18 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b27b6e..278df82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,6 @@ - Dropped support for Python 3.9 - Reorganized examples folder to avoid unnecessary nesting - ISMI data has been updated from older JSON data to examples in RDF (turtle) - - ## 0.3.1 diff --git a/README.md b/README.md index d1bc2e1..c6e3560 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ portions of EDTF (Extended Date Time Format), and parsing and conversion for dat *Undate was initially created as part of a [DH-Tech](https://dh-tech.github.io/) hackathon in November 2022.* ---- +* * * [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11068867.svg)](https://doi.org/10.5281/zenodo.11068867) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) diff --git a/src/undate/interval.py b/src/undate/interval.py index 96950cf..ddfacdb 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -94,7 +94,9 @@ def duration(self) -> Timedelta: # if range is open-ended, can't calculate if self.earliest is None or self.latest is None: - return NotImplemented + raise NotImplementedError( + "Cannot calculate duration for open-ended interval" + ) # if both years are known, subtract end of range from beginning of start if self.latest.known_year and self.earliest.known_year: diff --git a/src/undate/undate.py b/src/undate/undate.py index 244bf2d..be4454a 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -198,8 +198,8 @@ def set_calendar(self, calendar: Union[str, Calendar]): # look for calendar by upper-case name try: calendar = Calendar[calendar.upper()] - except KeyError: - raise ValueError(f"Calendar `{calendar}` is not supported") + except KeyError as err: + raise ValueError(f"Calendar `{calendar}` is not supported") from err self.calendar = calendar def __str__(self) -> str: @@ -292,15 +292,13 @@ def __eq__(self, other: object) -> bool: # in one format (i.e. X for missing digits). # If we support other formats, will need to normalize to common # internal format for comparison - if looks_equal: + if looks_equal and ( # if any part of either date that is known is _partially_ known, # then these dates are not equal - if any( - [self.is_partially_known(p) for p in self.initial_values.keys()] - ) or any( - [other.is_partially_known(p) for p in other.initial_values.keys()] - ): - return False + any([self.is_partially_known(p) for p in self.initial_values.keys()]) + or any([other.is_partially_known(p) for p in other.initial_values.keys()]) + ): + return False return looks_equal diff --git a/tests/test_converters/test_calendars/test_islamic/test_islamic_parser.py b/tests/test_converters/test_calendars/test_islamic/test_islamic_parser.py index 92ca94c..de4901e 100644 --- a/tests/test_converters/test_calendars/test_islamic/test_islamic_parser.py +++ b/tests/test_converters/test_calendars/test_islamic/test_islamic_parser.py @@ -1,4 +1,6 @@ import pytest +from lark.exceptions import LarkError + from undate.converters.calendars.islamic.parser import islamic_parser @@ -72,5 +74,5 @@ def test_should_parse(date_string): @pytest.mark.parametrize("date_string", error_cases) def test_should_error(date_string): - with pytest.raises(Exception): + with pytest.raises(LarkError): islamic_parser.parse(date_string) diff --git a/tests/test_interval.py b/tests/test_interval.py index 3101b2d..cf1a716 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -144,7 +144,10 @@ def test_duration(self): assert jan_march_duration.days == 2 # duration is not supported for open-ended intervals - assert UndateInterval(Undate(2000), None).duration() == NotImplemented + with pytest.raises( + NotImplementedError, match="Cannot calculate.*open-ended interval" + ): + assert UndateInterval(Undate(2000), None).duration() # one year set and the other not currently raises not implemented error with pytest.raises(NotImplementedError):