diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 3f3b4e7..ae450b4 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -17,23 +17,28 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.12" - cache: 'pip' - cache-dependency-path: '**/pyproject.toml' - - name: Install package with development dependencies - run: pip install -e ".[dev]" + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: "pyproject.toml" + + - name: Install package with check dependencies + run: uv sync --extra check # check with ruff - name: Run ruff - run: ruff check + run: uv run ruff check # check docs build - name: Check that documentation builds with no errors or warnings - run: sphinx-build docs docs/_build --fail-on-warning + run: uv run sphinx-build docs docs/_build --fail-on-warning # check types with mypy - name: Check types in python src directory; install needed types - run: mypy --install-types --non-interactive src + run: uv run mypy --install-types --non-interactive src # use treon to make sure that example notebooks run - name: Check jupyter notebooks with treon - run: treon + run: uv run treon diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 89df8cb..31e01ed 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -8,6 +8,8 @@ on: - 'undate/**' - 'tests/**' pull_request: + branches: + - "**" env: # python version used to calculate and submit code coverage @@ -18,30 +20,37 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python: ["3.10", "3.11", "3.12", "3.13"] defaults: run: working-directory: . steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + + # use github python action instead of uv to take advantage of caching + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - cache: 'pip' - cache-dependency-path: '**/pyproject.toml' - - name: Install package with dependencies - run: pip install -e ".[test]" + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: "pyproject.toml" + + - name: Install package with dev and test dependencies + run: uv sync --extra test # for all versions but the one we use for code coverage, run normally - name: Run unit tests normally - run: pytest + run: uv run pytest if: ${{ matrix.python != env.COV_PYTHON_VERSION }} # run code coverage in one version only - name: Run unit tests with code coverage reporting - run: pytest --cov=undate + run: uv run pytest --cov=undate if: ${{ matrix.python == env.COV_PYTHON_VERSION }} - name: Upload test coverage to Codecov uses: codecov/codecov-action@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index 23ec981..278df82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # Change Log +## 0.4 + +- Undate is now Calendar aware / Calendar explicit; default is Gregorian + - New `BaseCalendarConverter` class, with additional methods required for calendar converters + - `HebrewDateConverter`: Parsing and calendar conversion for Hebrew/Anno Mundi + - `IslamicDateConverter`: Parsing and calendar conversion for Islamic/Hijri + - `GregorianDateConverter`: basic Gregorian calendar logic + - `undate.Calendar` class to track `Undate` object calendar, and match with calendar converters +- BaseDateConverter class now includes nested/descendant subclasses when looking + for available converters +- `Undate.to_undate` method to convert supported date objects to `Undate` (`datetime.date`, `datetime.datetime`, and internal `undate.date.Date` class) +- `UndateInterval` improvements + - Can be initialized with `Undate` objects or any type supported by `Undate.to_undate` + - New method for contains (`in`), to determine if another interval or date is contained by an interval + - New method `intersection` to determine the overlap between two `UndateInterval` objects +- EDTF parser : fixed day parsing for some unsupported cases +- Dropped support for Python 3.9 +- Reorganized examples folder to avoid unnecessary nesting + - ISMI data has been updated from older JSON data to examples in RDF (turtle) + + ## 0.3.1 Update readthedocs config for current installation @@ -14,7 +35,7 @@ Update readthedocs config for current installation - Support 5+ digit years with leading Y (thanks to numpy.datetime64) - Jupyter notebook demonstrating / validating EDTF support - Full support for Level 0 Date and Time Interval (no Date and Time support) - - Level 1: + - Level 1: - Letter-prefixed calendar year - Unspecified digit from the right - Partial support for extended interval diff --git a/README.md b/README.md index 37b8452..c6e3560 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,13 @@ **undate** is a python library for working with uncertain or partially known dates. > [!WARNING] -> This is pre-alpha software and is **NOT** feature complete! Use with caution. -> Currently it only supports parsing and formatting dates in ISO8601 format and -> some portions of EDTF (Extended Date Time Format). +> This is alpha software and is not yet feature complete! Use with caution and give us feedback. +> Currently `undate` supports parsing and formatting dates in ISO8601, some +portions of EDTF (Extended Date Time Format), and parsing and conversion for dates in Hebrew Anno Mundi and Islamic Hijri calendars *Undate was initially created as part of a [DH-Tech](https://dh-tech.github.io/) hackathon in November 2022.* ---- +* * * [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11068867.svg)](https://doi.org/10.5281/zenodo.11068867) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) @@ -140,7 +140,7 @@ An `UndateInterval` is a date range between two `Undate` objects. Intervals can ``` You can initialize `Undate` or `UndateInterval` objects by parsing a date string with a specific converter, and you can also output an `Undate` object in those formats. -Available converters are "ISO8601" and "EDTF" (but only) +Currently available converters are "ISO8601" and "EDTF" and supported calendars. ```python >>> from undate import Undate @@ -156,7 +156,34 @@ Available converters are "ISO8601" and "EDTF" (but only) ``` -For more examples, refer to the [example notebooks](https://github.com/dh-tech/undate-python/tree/main/examples/notebooks/) included in this repository. +### Calendars + +All `Undate` objects are calendar aware, and date converters include support for parsing and working with dates from other calendars. The Gregorian calendar is used by default; currently `undate` supports the Islamic Hijri calendar and the Hebrew Anno Mundi calendar based on calendar conversion logic implemented in the [convertdate](https://convertdate.readthedocs.io/en/latest/) package. + +Dates are stored with the year, month, day and appropriate precision for the original calendar; internally, earliest and latest dates are calculated in Gregorian / Proleptic Gregorian calendar for standardized comparison across dates from different calendars. + +```python +>>> from undate import Undate +>>> tammuz4816 = Undate.parse("26 Tammuz 4816", "Hebrew") +>>> tammuz4816 + +>>> rajab495 = Undate.parse("Rajab 495", "Islamic") +>>> rajab495 + +>>> y2k = Undate.parse("2001", "EDTF") +>>> y2k + +>>> [str(d.earliest) for d in [rajab495, tammuz4816, y2k]] +['1102-04-28', '1056-07-17', '2001-01-01'] +>>> [str(d.precision) for d in [rajab495, tammuz4816, y2k]] +['MONTH', 'DAY', 'YEAR'] +>>> sorted([rajab495, tammuz4816, y2k]) +[, , ] +``` + +* * * + +For more examples, refer to the code notebooks included in the [examples](https://github.com/dh-tech/undate-python/tree/main/examples/) in this repository. ## Documentation diff --git a/docs/undate/converters.rst b/docs/undate/converters.rst index 701aaf1..b93b81e 100644 --- a/docs/undate/converters.rst +++ b/docs/undate/converters.rst @@ -1,19 +1,25 @@ Converters ========== +Overview +-------- + .. automodule:: undate.converters.base :members: :undoc-members: +Formats +-------- + ISO8601 -------- +^^^^^^^ .. automodule:: undate.converters.iso8601 :members: :undoc-members: Extended Date-Time Format (EDTF) --------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. automodule:: undate.converters.edtf.converter :members: @@ -23,8 +29,25 @@ Extended Date-Time Format (EDTF) :members: :undoc-members: -.. transformer is more of an internal, probably doesn't make sense to include -.. .. automodule:: undate.converters.edtf.transformer -.. :members: -.. :undoc-members: + +Calendars +--------- + +Gregorian +^^^^^^^^^ + +.. automodule:: undate.converters.calendars.gregorian + :members: + +Hebrew Anno Mundi calendar +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: undate.converters.calendars.hebrew.converter + :members: + +Islamic Hijri calendar +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: undate.converters.calendars.islamic.converter + :members: diff --git a/docs/undate/core.rst b/docs/undate/core.rst index e7b6b4b..4cc3e6b 100644 --- a/docs/undate/core.rst +++ b/docs/undate/core.rst @@ -1,13 +1,16 @@ Undate objects ============== -undates and undate intervals +dates, intervals, and calendar ------------------------------ .. autoclass:: undate.undate.Undate :members: -.. autoclass:: undate.undate.UndateInterval +.. autoclass:: undate.undate.Calendar + :members: + +.. autoclass:: undate.interval.UndateInterval :members: date, timedelta, and date precision diff --git a/examples/README.md b/examples/README.md index bf2a99c..080a6ce 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,7 +1,13 @@ # undate examples -Example data and projects with use cases for uncertain date logic and -example code notebooks using undate. - -- [use cases](use-cases) - examples from projects or specific data with use cases for undate -- [notebooks](notebooks) - code notebooks showing how undate can be used on a specific dataset or for a specific problem +This folder contains code notebooks demonstrating how `undate` can be +used on a specific dataset, problem, or format, as well as example +data from projects with use cases for uncertain date logic. + +## Contents + +- [EDTF support](edtf-support.ipynb) - demonstrate and validate supported portions of the Extended Date Time Format (EDTF) specification (jupyter notebook) +- [ISMI](ismi) - Sample data from the Islamic Scientific Manuscript Initiative project +- [Shakespeare and Company Project](shakespeare-and-company-project) - data, description of partial date implementation, and example notebook + - Events data from version 1.2 of Shakespeare and Company Project datasets + - [Partial date duration logic](shakespeare-and-company-project/shxco_partial_date_durations.ipynb) - compare `undate` partial date range duration logic with a previous implementation in the _Shakespeare and Company Project_ (jupyter notebook) diff --git a/examples/notebooks/edtf-support.ipynb b/examples/edtf-support.ipynb similarity index 99% rename from examples/notebooks/edtf-support.ipynb rename to examples/edtf-support.ipynb index 0295647..a604838 100644 --- a/examples/notebooks/edtf-support.ipynb +++ b/examples/edtf-support.ipynb @@ -55,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "9c6b7379-b2a7-4ec1-afa5-2cd9832c8a5d", "metadata": {}, "outputs": [], @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "923476ff-344a-4018-a02e-6e5f80ea76a8", "metadata": {}, "outputs": [], @@ -159,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "6ed422de-34a2-4324-b254-f62db00563f7", "metadata": {}, "outputs": [], @@ -212,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "8d98a139-627b-40bd-b1c5-d0028e538a53", "metadata": {}, "outputs": [], @@ -255,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "532470db-851e-4f91-9242-cd93d35054cf", "metadata": {}, "outputs": [], @@ -320,7 +320,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "a5abd0e4-0b26-49b0-bf78-3e1fe6c046d8", "metadata": {}, "outputs": [], @@ -425,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "e47f3fff-d35c-4c2e-9568-214763f6511a", "metadata": {}, "outputs": [], @@ -481,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "39143c1f-932a-450c-9b2d-ffbe3e1416b0", "metadata": {}, "outputs": [], @@ -535,7 +535,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "95965f17-0bd5-446f-bc09-9503eaed68e2", "metadata": {}, "outputs": [], @@ -589,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "c6c2d1a1-39f1-45eb-ac08-1de4fadbe842", "metadata": {}, "outputs": [], @@ -640,7 +640,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "f24fd31a-176a-40b5-bff4-d72b68f32a18", "metadata": {}, "outputs": [], @@ -688,7 +688,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "5910caab-eada-4715-b863-9bbbb15b9c5c", "metadata": {}, "outputs": [], diff --git a/examples/ismi/README.md b/examples/ismi/README.md new file mode 100644 index 0000000..e18c797 --- /dev/null +++ b/examples/ismi/README.md @@ -0,0 +1,22 @@ +# Sample data from the ISMI project database + +The [Islamic Scientific Manuscript Initiative project](https://ismi.mpwig-berlin.mpg.de) aims to collect information about all Islamic Manuscripts in the exact sciences from the 9th to the 19th centuries CE. + +The new [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) stores historical dates as CIDOC-CRM RDF objects with the following structure: + +- `crm:E52_Time-Span` + - `crm:P2_has_type`: date type + - "datetype:day" + - `crm:P82_at_some_time_within`: given day (xsd:date) + - "datetype:year" + - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of year (xsd:date) + - "datetype:range" + - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of range (xsd:date) + - `crm:P1_is_identified_by` + - `crm:E41_Appellation` + - `rdfs:label`: textual representation of timespan (e.g. "901 Rabīʿ I 14 (islamic)") + - `crm:P2_has_type`: calendar type (calendar the date was entered in) + - "calendartype:julian", "calendartype:islamic", "calendartype:gregorian" + - `crm:P3_has_note`: textual note with additional information + +A sample file with dates of each type can be found in `data/ismi-crm-date-samples.ttl` diff --git a/examples/ismi/data/ismi-crm-date-samples.ttl b/examples/ismi/data/ismi-crm-date-samples.ttl new file mode 100644 index 0000000..4c5a115 --- /dev/null +++ b/examples/ismi/data/ismi-crm-date-samples.ttl @@ -0,0 +1,104 @@ +@prefix rdfs: . +@prefix crm: . +@prefix xsd: . +# prefix for date and calendar type URIs +@prefix datetype: . +@prefix calendartype: . +# prefix for sample data +@prefix : . + +# day-precision date in islamic calendar +:date1 a crm:E52_Time-Span ; + crm:P2_has_type datetype:day ; + crm:P82_at_some_time_within "1495-12-11"^^xsd:date ; + crm:P3_has_note "day-precision date in islamic calendar" ; + crm:P1_is_identified_by :date1-label . +:date1-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:islamic ; + rdfs:label "901 Rabīʿ I 14 (islamic)" . + +# year-precision date in islamic calendar +:date2 a crm:E52_Time-Span ; + crm:P2_has_type datetype:year ; + crm:P82a_begin_of_the_begin "1479-04-03"^^xsd:date ; + crm:P82b_end_of_the_end "1480-03-21"^^xsd:date ; + crm:P3_has_note "year-precision date in islamic calendar" ; + crm:P1_is_identified_by :date2-label . +:date2-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:islamic ; + rdfs:label "884 (islamic)" . + +# range-type (century in islamic calendar) date in islamic calendar +:date3 a crm:E52_Time-Span ; + crm:P2_has_type datetype:range ; + crm:P82a_begin_of_the_begin "1494-10-11"^^xsd:date ; + crm:P82b_end_of_the_end "1591-10-18"^^xsd:date ; + crm:P3_has_note "range-type (century in islamic calendar) date in islamic calendar" ; + crm:P1_is_identified_by :date3-label . +:date3-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:islamic ; + rdfs:label "900 Muḥarram 1 - 999 Ḏu al-Ḥijjaẗ 29 (islamic)" . + +# day-precision date in gregorian calendar +:date4 a crm:E52_Time-Span ; + crm:P2_has_type datetype:day ; + crm:P82_at_some_time_within "1830-02-08"^^xsd:date ; + crm:P3_has_note "day-precision date in gregorian calendar" ; + crm:P1_is_identified_by :date4-label . +:date4-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:gregorian ; + rdfs:label "1830 February 8 (gregorian)" . + +# year-precision date in gregorian calendar +:date5 a crm:E52_Time-Span ; + crm:P2_has_type datetype:year ; + crm:P82a_begin_of_the_begin "1796-01-01"^^xsd:date ; + crm:P82b_end_of_the_end "1796-12-31"^^xsd:date ; + crm:P3_has_note "year-precision date in gregorian calendar" ; + crm:P1_is_identified_by :date5-label . +:date5-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:gregorian ; + rdfs:label "1796 (gregorian)" . + +# range-type (century in gregorian calendar) date in gregorian calendar +:date6 a crm:E52_Time-Span ; + crm:P2_has_type datetype:range ; + crm:P82a_begin_of_the_begin "1600-01-01"^^xsd:date ; + crm:P82b_end_of_the_end "1699-12-31"^^xsd:date ; + crm:P3_has_note "range-type (century in gregorian calendar) date in gregorian calendar" ; + crm:P1_is_identified_by :date6-label . +:date6-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:gregorian ; + rdfs:label "1600 January 1 - 1699 December 31 (gregorian)" . + +# day-precision date in julian calendar +:date7 a crm:E52_Time-Span ; + crm:P2_has_type datetype:day ; + crm:P82_at_some_time_within "1035-06-04"^^xsd:date ; + crm:P3_has_note "day-precision date in julian calendar" ; + crm:P1_is_identified_by :date7-label . +:date7-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:julian ; + rdfs:label "1035 May 29 (julian)" . + +# year-precision date in julian calendar +:date8 a crm:E52_Time-Span ; + crm:P2_has_type datetype:year ; + crm:P82a_begin_of_the_begin "1013-01-07"^^xsd:date ; + crm:P82b_end_of_the_end "1014-01-06"^^xsd:date ; + crm:P3_has_note "year-precision date in julian calendar" ; + crm:P1_is_identified_by :date8-label . +:date8-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:julian ; + rdfs:label "1013 (julian)" . + +# range-type (century in julian calendar) date in julian calendar +:date9 a crm:E52_Time-Span ; + crm:P2_has_type datetype:range ; + crm:P82a_begin_of_the_begin "1200-01-08"^^xsd:date ; + crm:P82b_end_of_the_end "1300-01-07"^^xsd:date ; + crm:P3_has_note "range-type (century in julian calendar) date in julian calendar" ; + crm:P1_is_identified_by :date9-label . +:date9-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:julian ; + rdfs:label "1200 January 1 - 1299 December 31 (julian)" . diff --git a/examples/notebooks/README.md b/examples/notebooks/README.md deleted file mode 100644 index 17c1270..0000000 --- a/examples/notebooks/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# `undate` demo notebooks - -This folder contains code notebooks demonstrating how undate can be used on a specific dataset or for a specific problem. - -- [EDTF support](edtf-support.ipynb) - demonstrate and validate supported portions of the Extended Date Time Format (EDTF) specification -- [Partial date duration logic](shxco_partial_date_durations.ipynb) - compare `undate` partial date range duration logic with a previous implementation in the _Shakespeare and Company Project_ diff --git a/examples/use-cases/shakespeare-and-company-project/README.md b/examples/shakespeare-and-company-project/README.md similarity index 100% rename from examples/use-cases/shakespeare-and-company-project/README.md rename to examples/shakespeare-and-company-project/README.md diff --git a/examples/use-cases/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv b/examples/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv similarity index 100% rename from examples/use-cases/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv rename to examples/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv diff --git a/examples/notebooks/shxco_partial_date_durations.ipynb b/examples/shakespeare-and-company-project/shxco_partial_date_durations.ipynb similarity index 99% rename from examples/notebooks/shxco_partial_date_durations.ipynb rename to examples/shakespeare-and-company-project/shxco_partial_date_durations.ipynb index 9e291f9..38efa6c 100644 --- a/examples/notebooks/shxco_partial_date_durations.ipynb +++ b/examples/shakespeare-and-company-project/shxco_partial_date_durations.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -29,7 +29,7 @@ "output_type": "stream", "text": [ "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Users/rkoeser/workarea/env/undate/bin/python3 -m pip install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -288,7 +288,7 @@ "[5 rows x 28 columns]" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -296,8 +296,8 @@ "source": [ "import pandas as pd\n", "\n", - "# load the 1.2 version of S&co events dataset; we have a copy in our use-cases folder\n", - "events_df = pd.read_csv(\"../use-cases/shakespeare-and-company-project/SCoData_events_v1.2_2022-01.csv\", low_memory=False)\n", + "# load the 1.2 version of S&co events dataset; expected to be in the sam folder as this notebook\n", + "events_df = pd.read_csv(\"./SCoData_events_v1.2_2022-01.csv\", low_memory=False)\n", "events_df.head()" ] }, @@ -316,14 +316,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { "id": "y_MqgrQW64uI" }, "outputs": [], "source": [ + "from undate import UndateInterval\n", "from undate.date import ONE_DAY\n", - "from undate.undate import UndateInterval\n", "from undate.converters.iso8601 import ISO8601DateFormat\n", "\n", "def undate_duration(start_date, end_date):\n", @@ -353,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -452,7 +452,7 @@ "260 4 months 122.0 " ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -478,7 +478,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -505,7 +505,7 @@ "Name: count, Length: 133, dtype: int64" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -517,7 +517,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -540,7 +540,7 @@ "Name: subscription_duration_days, dtype: float64" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -558,7 +558,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -607,7 +607,7 @@ "Index: []" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -619,7 +619,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -688,7 +688,7 @@ "13686 NaN 31.0 " ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -707,7 +707,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": { "id": "jwvN9-CgLQRx" }, @@ -727,7 +727,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -832,7 +832,7 @@ "260 4 months 122.0 152 days " ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -845,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -950,7 +950,7 @@ "260 4 months 122.0 152 days " ] }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -962,7 +962,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1161,7 +1161,7 @@ "[9144 rows x 7 columns]" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1174,7 +1174,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1200,7 +1200,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 14, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1220,7 +1220,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1403,7 +1403,7 @@ "313 30.0 " ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1416,7 +1416,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1443,7 +1443,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1455,7 +1455,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1686,7 +1686,7 @@ "472 30.0 60 days 30.0 " ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1709,7 +1709,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1957,7 +1957,7 @@ "415 29.0 " ] }, - "execution_count": 18, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1982,7 +1982,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2068,7 +2068,7 @@ "606 G. E. Pulsford --01-20 --01-28 8.0" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -2082,7 +2082,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2168,7 +2168,7 @@ "29908 Ann Samyn 1961-10-04 1962-03-21 168.0" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -2179,7 +2179,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2316,7 +2316,7 @@ "611 Gertrude Stein --01-24 --05-30 126.0 126 days" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2329,7 +2329,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2489,7 +2489,7 @@ "611 0.0 " ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2502,7 +2502,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -2519,7 +2519,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2551,7 +2551,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", diff --git a/examples/use-cases/ismi/README.md b/examples/use-cases/ismi/README.md deleted file mode 100644 index 2340bc9..0000000 --- a/examples/use-cases/ismi/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Sample data from the ISMI project database - -The [Islamic Scientific Manuscript Initiative project](https://ismi.mpwig-berlin.mpg.de) aims to collect information about all Islamic Manuscripts in the exact sciences from the 9th to the 19th centuries CE. - -The old [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) database OpenMind (OM4) stores historical dates as JSON objects with the following structure: - -- `state` - - "unknown": no date - - "not checked": unparsed date in `date_in_text` - - "known": date or date range entered in specified calendar - - `calendar_type`: calendar the date was entered in - - "Julian", "Islamic", "Gregorian" - - `input_form`: date type - - "Year" - - `from`: first day, `until`: last day of year (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - "Range" - - `from`: first day, `until`: last day of range (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - "Date" - - `date`: given day (date in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - `additional_info`: textual note with additional information - -We plan to extract as much as possible of this data in the migration to the new RDF database with a CIDOC-CRM-based data model. - -A sample file with dates of each type can be found in `data/ismi-om4-date-samples.json` diff --git a/examples/use-cases/ismi/data/ismi-om4-date-samples.json b/examples/use-cases/ismi/data/ismi-om4-date-samples.json deleted file mode 100644 index 4300195..0000000 --- a/examples/use-cases/ismi/data/ismi-om4-date-samples.json +++ /dev/null @@ -1,186 +0,0 @@ -[ - { - "state": "unknown" - }, - { - "date_in_text": "8th/14th century", - "state": "not checked" - }, - { - "additional_info": "year 1233 in Julian calendar", - "calendar_type": "Julian", - "from": { - "ambiguity": 0, - "century": 13, - "dayOfMonth": 8, - "dayOfYear": 8, - "month": 1, - "year": 1232 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 0, - "century": 13, - "dayOfMonth": 7, - "dayOfYear": 7, - "month": 1, - "year": 1233 - }, - "year": 1232 - }, - { - "additional_info": "year 1205 in Islamic calendar", - "calendar_type": "Islamic", - "from": { - "ambiguity": 2, - "century": 18, - "dayOfMonth": 10, - "dayOfYear": 253, - "month": 9, - "year": 1790 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 2, - "century": 18, - "dayOfMonth": 29, - "dayOfYear": 241, - "month": 8, - "year": 1791 - }, - "year": 1205 - }, - { - "additional_info": "year 1564 in Gregorian calendar", - "calendar_type": "Gregorian", - "from": { - "ambiguity": 0, - "century": 16, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1564 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 0, - "century": 16, - "dayOfMonth": 31, - "dayOfYear": 366, - "month": 12, - "year": 1564 - }, - "year": 1564 - }, - { - "additional_info": "3. Martius(3) 1481 (1481-03-03) in Julian calendar (1481-03-12 Gregorian)", - "calendar_type": "Julian", - "date": { - "ambiguity": 0, - "century": 15, - "dayOfMonth": 12, - "dayOfYear": 71, - "month": 3, - "year": 1481 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "6. Muḥarram(1) 888 in Islamic calendar (1483-02-23[+-2] Gregorian)", - "calendar_type": "Islamic", - "date": { - "ambiguity": 2, - "century": 15, - "dayOfMonth": 23, - "dayOfYear": 54, - "month": 2, - "year": 1483 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "1. September(9) 1621 in Gregorian calendar", - "calendar_type": "Gregorian", - "date": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 244, - "month": 9, - "year": 1621 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "1. Ianuarius(1) 811 - 31. December(12) 811 in Julian calendar", - "calendar_type": "Julian", - "from": { - "ambiguity": 0, - "century": 9, - "dayOfMonth": 5, - "dayOfYear": 5, - "month": 1, - "year": 811 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 0, - "century": 9, - "dayOfMonth": 4, - "dayOfYear": 4, - "month": 1, - "year": 812 - } - }, - { - "additional_info": "1. Muḥarram(1) 1000 - 29. Ḏu al-Ḥijjaẗ(12) 1024 in Islamic calendar", - "calendar_type": "Islamic", - "from": { - "ambiguity": 2, - "century": 16, - "dayOfMonth": 19, - "dayOfYear": 292, - "month": 10, - "year": 1591 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 2, - "century": 17, - "dayOfMonth": 19, - "dayOfYear": 19, - "month": 1, - "year": 1616 - } - }, - { - "additional_info": "1650-01-01 - 1699-01-01 in Gregorian calendar", - "calendar_type": "Gregorian", - "from": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1650 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1699 - } - } -] diff --git a/pyproject.toml b/pyproject.toml index 9179ca0..2dc6515 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,9 +7,9 @@ name = "undate" description = "library for working with uncertain, fuzzy, or partially unknown dates and date intervals" readme = "README.md" license = { text = "Apache-2" } -requires-python = ">= 3.9" +requires-python = ">= 3.10" dynamic = ["version"] -dependencies = ["lark", "numpy"] +dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"] authors = [ { name = "Rebecca Sutton Koeser" }, { name = "Cole Crawford" }, @@ -29,9 +29,8 @@ keywords = [ "digital-humanities", ] classifiers = [ - "Development Status :: 2 - Pre-Alpha", + "Development Status :: 3 - Alpha", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -50,14 +49,14 @@ classifiers = [ [project.optional-dependencies] docs = ["sphinx>=7.0.0", "alabaster", "myst-parser", "myst-parser[linkify]"] test = ["pytest>=7.2", "pytest-ordering", "pytest-cov"] +notebooks = ["jupyterlab", "pandas", "treon"] +check = ["undate[docs]", "undate[notebooks]", "mypy", "ruff"] dev = [ - "ruff", "pre-commit>=2.20.0", "twine", "wheel", "build", - "mypy", - "treon", + "undate[check]", "undate[docs]", "undate[test]", ] diff --git a/src/undate/__init__.py b/src/undate/__init__.py index c8d79a0..0976d0e 100644 --- a/src/undate/__init__.py +++ b/src/undate/__init__.py @@ -1,6 +1,7 @@ -__version__ = "0.3.1" +__version__ = "0.4.0" from undate.date import DatePrecision -from undate.undate import Undate, UndateInterval +from undate.undate import Undate, Calendar +from undate.interval import UndateInterval -__all__ = ["Undate", "UndateInterval", "DatePrecision", "__version__"] +__all__ = ["Undate", "UndateInterval", "Calendar", "DatePrecision", "__version__"] diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py index 02cf820..04db129 100644 --- a/src/undate/converters/base.py +++ b/src/undate/converters/base.py @@ -1,10 +1,11 @@ """ -:class:`undate.converters.BaseDateConverter` provides a base class for +:class:`~undate.converters.BaseDateConverter` provides a base class for implementing date converters, which can provide support for -parsing and generating dates in different formats and also converting -dates between different calendars. +parsing and generating dates in different formats. +The converter subclass :class:`undate.converters.BaseCalendarConverter` +provides additional functionality needed for calendar conversion. -To add support for a new date format or calendar conversion: +To add support for a new date converter: - Create a new file under ``undate/converters/`` - For converters with sufficient complexity, you may want to create a submodule; @@ -18,6 +19,26 @@ The new subclass should be loaded automatically and included in the converters returned by :meth:`BaseDateConverter.available_converters` +To add support for a new calendar converter: + +- Create a new file under ``undate/converters/calendars/`` + - For converters with sufficient complexity, you may want to create a submodule; + see ``undate.converters.calendars.islamic`` for an example. +- Extend ``BaseCalendarConverter`` and implement ``parse`` and ``to_string`` + formatter methods as desired/appropriate for your converter as well as the + additional methods for ``max_month``, ``max_day``, and conversion ``to_gregorian`` + calendar. +- Import your calendar in ``undate/converters/calendars/__init__.py`` and include in `__all__`` +- Add unit tests for the new calendar logic under ``tests/test_converters/calendars/`` +- Add the new calendar to the ``Calendar`` enum of supported calendars in + ``undate/undate.py`` and confirm that the `get_converter` method loads your + calendar converter correctly (an existing unit test should cover this). +- Consider creating a notebook to demonstrate the use of the calendar + converter. + +Calendar converter subclasses are also automatically loaded and included +in the list of available converters. + ------------------- """ @@ -90,6 +111,59 @@ def available_converters(cls) -> Dict[str, Type["BaseDateConverter"]]: """ Dictionary of available converters keyed on name. """ + return {c.name: c for c in cls.subclasses()} # type: ignore + + @classmethod + def subclasses(cls) -> set[Type["BaseDateConverter"]]: + """ + Set of available converters classes. Includes descendant + subclasses, including calendar converters, but does not include + :class:`BaseCalendarConverter`. + """ # ensure undate converters are imported cls.import_converters() - return {c.name: c for c in cls.__subclasses__()} # type: ignore + + # find all direct subclasses, excluding base calendar converter + direct_subclasses = cls.__subclasses__() + all_subclasses = set(direct_subclasses) + # recurse to find nested subclasses + for subc in direct_subclasses: + all_subclasses |= subc.subclasses() + + # omit the calendar converter base class, which is not itself a converter + all_subclasses -= {BaseCalendarConverter} + return all_subclasses + + +class BaseCalendarConverter(BaseDateConverter): + """Base class for calendar converters, with additional methods required + for calendars.""" + + #: Converter name. Subclasses must define a unique name. + name: str = "Base Calendar Converter" + + def min_month(self) -> int: + """Smallest numeric month for this calendar.""" + raise NotImplementedError + + def max_month(self, year: int) -> int: + """Maximum numeric month for this calendar""" + raise NotImplementedError + + def first_month(self) -> int: + """first month in this calendar; by default, returns :meth:`min_month`.""" + return self.min_month() + + def last_month(self, year: int) -> int: + """last month in this calendar; by default, returns :meth:`max_month`.""" + return self.max_month(year) + + def max_day(self, year: int, month: int) -> int: + """maximum numeric day for the specified year and month in this calendar""" + raise NotImplementedError + + def to_gregorian(self, year, month, day) -> tuple[int, int, int]: + """Convert a date for this calendar specified by numeric year, month, and day, + into the Gregorian equivalent date. Should return a tuple of year, month, day. + """ + raise NotImplementedError diff --git a/src/undate/converters/calendars/__init__.py b/src/undate/converters/calendars/__init__.py new file mode 100644 index 0000000..a43a270 --- /dev/null +++ b/src/undate/converters/calendars/__init__.py @@ -0,0 +1,5 @@ +from undate.converters.calendars.gregorian import GregorianDateConverter +from undate.converters.calendars.hebrew import HebrewDateConverter +from undate.converters.calendars.islamic import IslamicDateConverter + +__all__ = ["GregorianDateConverter", "HebrewDateConverter", "IslamicDateConverter"] diff --git a/src/undate/converters/calendars/gregorian.py b/src/undate/converters/calendars/gregorian.py new file mode 100644 index 0000000..5a1d2dc --- /dev/null +++ b/src/undate/converters/calendars/gregorian.py @@ -0,0 +1,51 @@ +from calendar import monthrange + +from undate.converters.base import BaseCalendarConverter + + +class GregorianDateConverter(BaseCalendarConverter): + """ + Calendar converter class for Gregorian calendar. + """ + + #: converter name: Gregorian + name: str = "Gregorian" + #: calendar + calendar_name: str = "Gregorian" + + #: known non-leap year + NON_LEAP_YEAR: int = 2022 + + def min_month(self) -> int: + """First month for the Gregorian calendar.""" + return 1 + + def max_month(self, year: int) -> int: + """maximum numeric month for the specified year in the Gregorian calendar""" + return 12 + + def max_day(self, year: int, month: int) -> int: + """maximum numeric day for the specified year and month in this calendar""" + # if month is known, use that to calculate + if month: + # if year is known, use it; otherwise use a known non-leap year + # (only matters for February) + year = year or self.NON_LEAP_YEAR + + # Use monthrange from python builtin calendar module. + # returns first day of the month and number of days in the month + # for the specified year and month. + _, max_day = monthrange(year, month) + else: + # if year and month are unknown, return maximum possible + max_day = 31 + + return max_day + + def to_gregorian(self, year, month, day) -> tuple[int, int, int]: + """Convert to Gregorian date. This returns the specified by year, month, + and day unchanged, but is provided for consistency since all calendar + converters need to support conversion to Gregorian calendar for + a common point of comparison. + """ + return (year, month, day) diff --git a/src/undate/converters/calendars/hebrew/__init__.py b/src/undate/converters/calendars/hebrew/__init__.py new file mode 100644 index 0000000..e612ce3 --- /dev/null +++ b/src/undate/converters/calendars/hebrew/__init__.py @@ -0,0 +1,3 @@ +from undate.converters.calendars.hebrew.converter import HebrewDateConverter + +__all__ = ["HebrewDateConverter"] diff --git a/src/undate/converters/calendars/hebrew/converter.py b/src/undate/converters/calendars/hebrew/converter.py new file mode 100644 index 0000000..d540021 --- /dev/null +++ b/src/undate/converters/calendars/hebrew/converter.py @@ -0,0 +1,78 @@ +from typing import Union + +from convertdate import hebrew # type: ignore +from lark.exceptions import UnexpectedCharacters + +from undate import Undate, UndateInterval +from undate.converters.base import BaseCalendarConverter +from undate.converters.calendars.hebrew.parser import hebrew_parser +from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer + + +class HebrewDateConverter(BaseCalendarConverter): + """ + Converter for Hebrew Anno Mundicalendar. + + Support for parsing Anno Mundi dates and converting to Undate and UndateInterval + objects in the Gregorian calendar. + """ + + #: converter name: Hebrew + name: str = "Hebrew" + calendar_name: str = "Anno Mundi" + + def __init__(self): + self.transformer = HebrewDateTransformer() + + def min_month(self) -> int: + """Smallest numeric month for this calendar.""" + return 1 + + def max_month(self, year: int) -> int: + """Maximum numeric month for this calendar. In Hebrew calendar, this is 12 or 13 + depending on whether it is a leap year.""" + return hebrew.year_months(year) + + def first_month(self) -> int: + """First month in this calendar. The Hebrew civil year starts in Tishri.""" + return hebrew.TISHRI + + def last_month(self, year: int) -> int: + """Last month in this calendar. Hebrew civil year starts in Tishri, + Elul is the month before Tishri.""" + return hebrew.ELUL + + def max_day(self, year: int, month: int) -> int: + """maximum numeric day for the specified year and month in this calendar""" + # NOTE: unreleased v2.4.1 of convertdate standardizes month_days to month_length + return hebrew.month_days(year, month) + + def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: + """Convert a Hebrew date, specified by year, month, and day, + to the Gregorian equivalent date. Returns a tuple of year, month, day. + """ + return hebrew.to_gregorian(year, month, day) + + def parse(self, value: str) -> Union[Undate, UndateInterval]: + """ + Parse a Hebrew date string and return an :class:`~undate.undate.Undate` or + :class:`~undate.undate.UndateInterval`. + The Hebrew date string is preserved in the undate label. + """ + if not value: + raise ValueError("Parsing empty string is not supported") + + # parse the input string, then transform to undate object + try: + # parse the string with our Hebrew date parser + parsetree = hebrew_parser.parse(value) + # transform the parse tree into an undate or undate interval + undate_obj = self.transformer.transform(parsetree) + # set the original date as a label, with the calendar name + undate_obj.label = f"{value} {self.calendar_name}" + return undate_obj + except UnexpectedCharacters as err: + raise ValueError(f"Could not parse '{value}' as a Hebrew date") from err + + # do we need to support conversion the other direction? + # i.e., generate a Hebrew date from an abitrary undate or undate interval? diff --git a/src/undate/converters/calendars/hebrew/hebrew.lark b/src/undate/converters/calendars/hebrew/hebrew.lark new file mode 100644 index 0000000..b55ec3f --- /dev/null +++ b/src/undate/converters/calendars/hebrew/hebrew.lark @@ -0,0 +1,56 @@ +%import common.WS +%ignore WS + +// only support day month year format for now +// parser requires numeric day and year to be distinguished based on order +hebrew_date: day month year | month year | year + +// TODO: handle date ranges? + +// TODO: add support for qualifiers? +// PGP dates use qualifiers like "first decade of" (for beginning of month) +// "first third of", seasons (can look for more examples) + +// Hebrew calendar starts with year 1 in 3761 BCE +year: /\d+/ + +// months +month: month_1 + | month_2 + | month_3 + | month_4 + | month_5 + | month_6 + | month_7 + | month_8 + | month_9 + | month_10 + | month_11 + | month_12 + | month_13 +// months have 29 or 30 days; we do not expect leading zeroes +day: /[1-9]/ | /[12][0-9]/ | /30/ + +// months, in order; from convertdate list +// with variants from Princeton Geniza Project +// support matching with and without accents +month_1: "Nisan" +// Iyar or Iyyar +month_2: /Iyy?ar/ +month_3: "Sivan" +month_4: "Tammuz" +month_5: "Av" +month_6: "Elul" +// Tishrei or Tishri +month_7: /Tishre?i/ +month_8: "Heshvan" +month_9: "Kislev" +// Tevet or Teveth +month_10: /[ṬT]eveth?/ +month_11: "Shevat" +// Adar I or Adar +month_12: /Adar( I)?/ +// Adar II or Adar Bet +month_13: /Adar (II|Bet)/ + + diff --git a/src/undate/converters/calendars/hebrew/parser.py b/src/undate/converters/calendars/hebrew/parser.py new file mode 100644 index 0000000..5654f60 --- /dev/null +++ b/src/undate/converters/calendars/hebrew/parser.py @@ -0,0 +1,9 @@ +import pathlib + +from lark import Lark + +grammar_path = pathlib.Path(__file__).parent / "hebrew.lark" + +with open(grammar_path) as grammar: + # NOTE: LALR parser is faster but can't be used to ambiguity between years and dates + hebrew_parser = Lark(grammar.read(), start="hebrew_date", strict=True) diff --git a/src/undate/converters/calendars/hebrew/transformer.py b/src/undate/converters/calendars/hebrew/transformer.py new file mode 100644 index 0000000..48e8b20 --- /dev/null +++ b/src/undate/converters/calendars/hebrew/transformer.py @@ -0,0 +1,40 @@ +from lark import Transformer, Tree + +from undate import Undate, Calendar + + +class HebrewUndate(Undate): + """Undate convience subclass; sets default calendar to Hebrew.""" + + calendar = Calendar.HEBREW + + +class HebrewDateTransformer(Transformer): + """Transform a Hebrew date parse tree and return an Undate or + UndateInterval.""" + + def hebrew_date(self, items): + parts = {} + for child in items: + if child.data in ["year", "month", "day"]: + # in each case we expect one integer value; + # anonymous tokens convert to their value and cast as int + value = int(child.children[0]) + parts[str(child.data)] = value + + # initialize and return an undate with islamic year, month, day and + # islamic calendar + return HebrewUndate(**parts) + + # year translation is not needed since we want a tree with name year + # this is equivalent to a no-op + # def year(self, items): + # return Tree(data="year", children=[items[0]]) + + def month(self, items): + # month has a nested tree for the rule and the value + # the name of the rule (month_1, month_2, etc) gives us the + # number of the month needed for converting the date + tree = items[0] + month_n = tree.data.split("_")[-1] + return Tree(data="month", children=[month_n]) diff --git a/src/undate/converters/calendars/islamic/__init__.py b/src/undate/converters/calendars/islamic/__init__.py new file mode 100644 index 0000000..ffbb9d2 --- /dev/null +++ b/src/undate/converters/calendars/islamic/__init__.py @@ -0,0 +1,3 @@ +from undate.converters.calendars.islamic.converter import IslamicDateConverter + +__all__ = ["IslamicDateConverter"] diff --git a/src/undate/converters/calendars/islamic/converter.py b/src/undate/converters/calendars/islamic/converter.py new file mode 100644 index 0000000..c658c90 --- /dev/null +++ b/src/undate/converters/calendars/islamic/converter.py @@ -0,0 +1,67 @@ +from typing import Union + +from convertdate import islamic # type: ignore +from lark.exceptions import UnexpectedCharacters + +from undate import Undate, UndateInterval +from undate.converters.base import BaseCalendarConverter +from undate.converters.calendars.islamic.parser import islamic_parser +from undate.converters.calendars.islamic.transformer import IslamicDateTransformer + + +class IslamicDateConverter(BaseCalendarConverter): + """ + Converter for Islamic Hijri calendar. + + Support for parsing Islamic Hijri dates and converting to Undate and UndateInterval + objects in the Gregorian calendar. + """ + + #: converter name: Islamic + name: str = "Islamic" + calendar_name: str = "Islamic" + + def __init__(self): + self.transformer = IslamicDateTransformer() + + def max_day(self, year: int, month: int) -> int: + """maximum numeric day for the specified year and month in this calendar""" + return islamic.month_length(year, month) + + def min_month(self) -> int: + """smallest numeric month for this calendar.""" + return 1 + + def max_month(self, year: int) -> int: + """maximum numeric month for this calendar""" + return 12 + + def to_gregorian(self, year: int, month: int, day: int) -> tuple[int, int, int]: + """Convert a Hijri date, specified by year, month, and day, + to the Gregorian equivalent date. Returns a tuple of year, month, day. + """ + return islamic.to_gregorian(year, month, day) + + def parse(self, value: str) -> Union[Undate, UndateInterval]: + """ + Parse an Islamic/Hijri date string and return an :class:`~undate.undate.Undate` or + :class:`~undate.undate.UndateInterval`. + The Islamic/Hijri date string is preserved in the undate label. + """ + if not value: + raise ValueError("Parsing empty string is not supported") + + # parse the input string, then transform to undate object + try: + # parse the string with our Islamic Hijri date parser + parsetree = islamic_parser.parse(value) + # transform the parse tree into an undate or undate interval + undate_obj = self.transformer.transform(parsetree) + # set the original date as a label, with the calendar name + undate_obj.label = f"{value} {self.calendar_name}" + return undate_obj + except UnexpectedCharacters as err: + raise ValueError(f"Could not parse '{value}' as an Islamic date") from err + + # do we need to support conversion the other direction? + # i.e., generate an Islamic Hijri date from an arbitrary undate or undate interval? diff --git a/src/undate/converters/calendars/islamic/islamic.lark b/src/undate/converters/calendars/islamic/islamic.lark new file mode 100644 index 0000000..3ad59a5 --- /dev/null +++ b/src/undate/converters/calendars/islamic/islamic.lark @@ -0,0 +1,56 @@ +%import common.WS +%ignore WS + +// only support day month year format for now +// parser requires numeric day and year to be distinguished based on order +islamic_date: day month year | month year | year + +// TODO: handle date ranges? + +// TODO: add support for qualifiers? +// PGP dates use qualifiers like "first decade of" (for beginning of month) +// "first third of", seasons (can look for more examples) + +year: /\d+/ + +// months +month: month_1 + | month_2 + | month_3 + | month_4 + | month_5 + | month_6 + | month_7 + | month_8 + | month_9 + | month_10 + | month_11 + | month_12 +// months have 29 or 30 days; we do not expect leading zeroes +day: /[1-9]/ | /[12][0-9]/ | /30/ + +// months, in order; from convertdate list +// with variants from Princeton Geniza Project +// support matching with and without accents +// al-Muḥarram or Muḥarram +month_1: /(al-)?Mu[ḥh]arram/ +month_2: /[ṢS]afar/ +// Rabīʿ al-ʾAwwal or Rabi' I +month_3: /Rab[īi][ʿ'] (al-[`ʾ]Awwal|I)/ +// Rabīʿ ath-Thānī or Rabi' II +month_4: /Rab[īi][ʿ'] (ath-Th[āa]n[īi]|II)/ +// Jumādā al-ʾAwwal or Jumādā I +month_5: /Jum[āa]d[āa] (al-[ʾ`]Awwal|I)/ +// Jumādā ath-Thāniya or Jumādā II +month_6: /Jum[āa]d[āa] (ath-Th[āa]niyah|II)/ +month_7: "Rajab" +// Shaʿbān +month_8: /Sha[ʿ']b[āa]n/ +month_9: /Rama[ḍd][āa]n/ +month_10: /Shaww[āa]l/ +// Zū al-Qaʿdah or Dhu l-Qa'da +month_11: /(Z|Dh)[ūu] a?l-Qa[ʿ']dah?/ +// Zū al-Ḥijjah or Dhu l-Hijja +month_12: /(Z|Dh)[ūu] a?l-[HḤ]ijjah?/ + + diff --git a/src/undate/converters/calendars/islamic/parser.py b/src/undate/converters/calendars/islamic/parser.py new file mode 100644 index 0000000..b103711 --- /dev/null +++ b/src/undate/converters/calendars/islamic/parser.py @@ -0,0 +1,9 @@ +import pathlib + +from lark import Lark + +grammar_path = pathlib.Path(__file__).parent / "islamic.lark" + +with open(grammar_path) as grammar: + # NOTE: LALR parser is faster but can't be used due to ambiguity between years and days + islamic_parser = Lark(grammar.read(), start="islamic_date", strict=True) diff --git a/src/undate/converters/calendars/islamic/transformer.py b/src/undate/converters/calendars/islamic/transformer.py new file mode 100644 index 0000000..9ffce36 --- /dev/null +++ b/src/undate/converters/calendars/islamic/transformer.py @@ -0,0 +1,40 @@ +from lark import Transformer, Tree + +from undate import Undate, Calendar + + +class IslamicUndate(Undate): + """Undate convience subclass; sets default calendar to Islamic.""" + + calendar = Calendar.ISLAMIC + + +class IslamicDateTransformer(Transformer): + """Transform an Islamic Hijri date parse tree and return an Undate or + UndateInterval.""" + + def islamic_date(self, items): + parts = {} + for child in items: + if child.data in ["year", "month", "day"]: + # in each case we expect one integer value; + # anonymous tokens convert to their value and cast as int + value = int(child.children[0]) + parts[str(child.data)] = value + + # initialize and return an undate with islamic year, month, day and + # islamic calendar + return IslamicUndate(**parts) + + # year translation is not needed since we want a tree with name year + # this is equivalent to a no-op + # def year(self, items): + # return Tree(data="year", children=[items[0]]) + + def month(self, items): + # month has a nested tree for the rule and the value + # the name of the rule (month_1, month_2, etc) gives us the + # number of the month needed for converting the date + tree = items[0] + month_n = tree.data.split("_")[-1] + return Tree(data="month", children=[month_n]) diff --git a/src/undate/converters/edtf/converter.py b/src/undate/converters/edtf/converter.py index 95a1364..d0b742f 100644 --- a/src/undate/converters/edtf/converter.py +++ b/src/undate/converters/edtf/converter.py @@ -2,11 +2,12 @@ from lark.exceptions import UnexpectedCharacters +from undate import Undate, UndateInterval from undate.converters.base import BaseDateConverter from undate.converters.edtf.parser import edtf_parser from undate.converters.edtf.transformer import EDTFTransformer from undate.date import DatePrecision -from undate.undate import Undate, UndateInterval + #: character for unspecified digits EDTF_UNSPECIFIED_DIGIT: str = "X" diff --git a/src/undate/converters/edtf/edtf.lark b/src/undate/converters/edtf/edtf.lark index e6f3a15..8587599 100644 --- a/src/undate/converters/edtf/edtf.lark +++ b/src/undate/converters/edtf/edtf.lark @@ -16,7 +16,7 @@ date: year | year "-" month | year "-" month "-" day year: /-?\d+/ month: /(0[1-9])|(1[0-2])/ -day: /([0-2][1-9])|(3[0-1])/ +day: /(0[1-9])|([12][0-9])|(3[01])/ timeinterval: date "/" date diff --git a/src/undate/converters/edtf/parser.py b/src/undate/converters/edtf/parser.py index 6ab5139..27c2bd6 100644 --- a/src/undate/converters/edtf/parser.py +++ b/src/undate/converters/edtf/parser.py @@ -1,45 +1,8 @@ -import os.path +import pathlib from lark import Lark -grammar_path = os.path.join(os.path.dirname(__file__), "edtf.lark") +grammar_path = pathlib.Path(__file__).parent / "edtf.lark" with open(grammar_path) as grammar: edtf_parser = Lark(grammar.read(), start="edtf") - - -# testcases = [ -# "1984", -# "1984-05", -# "1984-12", -# "1001-03-30", -# "1000/2000", -# "1000-01/2000-05-01", -# # level 1 -# "Y170000002", -# "2001-21", # spring 2001 -# # qualifiers -# "1984?", -# "2004-06~", -# "2004-06-11%", -# # unspecified digits from right -# "201X", -# "20XX", -# "2004-XX", -# "1985-04-XX", -# "1985-XX-XX", -# # open ended intervals -# "1985-04-12/..", -# "1985-04/..", -# "../1985-04-12", -# "/1985-04-12", -# "1984-13", -# ] - -# for testcase in testcases: -# print(f"\n{testcase}") -# tree = edtf_parser.parse(testcase) -# print(tree.pretty()) - - -# error_cases = ["1984-13", "Y1702"] diff --git a/src/undate/converters/edtf/transformer.py b/src/undate/converters/edtf/transformer.py index 135c93b..0b1de76 100644 --- a/src/undate/converters/edtf/transformer.py +++ b/src/undate/converters/edtf/transformer.py @@ -1,6 +1,6 @@ from lark import Token, Transformer, Tree -from undate.undate import Undate, UndateInterval +from undate import Undate, UndateInterval class EDTFTransformer(Transformer): @@ -54,24 +54,19 @@ def year_unspecified(self, items): return Tree(data="year", children=[value]) def month_unspecified(self, items): + # combine multiple parts into a single string value = "".join(self.get_values(items)) return Tree(data="month", children=[value]) def day_unspecified(self, items): + # combine multiple parts into a single string value = "".join(self.get_values(items)) return Tree(data="day", children=[value]) def date_level1(self, items): return self.date(items) - def year(self, items): - # when the year is negative, there are two tokens - if len(items) > 1 and items[0] == "-": - # an anonymous token for the - and the integer year - year = items[1] - return Tree(data="year", children=[-year]) - - return Tree(data="year", children=[items[0]]) + # year (including negative years) use default transformation def year_fivedigitsplus(self, items): # strip off the leading Y and convert to integer diff --git a/src/undate/converters/iso8601.py b/src/undate/converters/iso8601.py index a0ecad5..4f05b69 100644 --- a/src/undate/converters/iso8601.py +++ b/src/undate/converters/iso8601.py @@ -1,7 +1,7 @@ from typing import Dict, List, Union +from undate import Undate, UndateInterval from undate.converters.base import BaseDateConverter -from undate.undate import Undate, UndateInterval class ISO8601DateFormat(BaseDateConverter): @@ -77,19 +77,33 @@ def _undate_to_string(self, undate: Undate) -> str: # TODO: may want to refactor and take advantage of the year/month/day properties # added for use in EDTF formatter code for date_portion, iso_format in self.iso_format.items(): + # is known means fully known, means guaranteed integer if undate.is_known(date_portion): # NOTE: datetime strftime for %Y for 3-digit year # results in leading zero in some environments # and not others; force year to always be 4 digits - if date_portion == "year": - date_parts.append("%04d" % undate.earliest.year) - elif date_portion == "month" and undate.earliest.month: - date_parts.append("%02d" % undate.earliest.month) - elif date_portion == "day" and undate.earliest.day: - date_parts.append("%02d" % undate.earliest.day) # type: ignore + if date_portion == "year" and undate.year: + try: + date_parts.append("%04d" % int(undate.year)) + except ValueError: + # shouldn't happen because of is_known + date_parts.append(undate.year) + elif date_portion == "month" and undate.month: + try: + date_parts.append("%02d" % int(undate.month)) + except ValueError: + # shouldn't happen because of is_known + date_parts.append(undate.month) + elif date_portion == "day" and undate.day: + try: + date_parts.append("%02d" % int(undate.day)) + except ValueError: + # shouldn't happen because of is_known + date_parts.append(undate.day) elif date_portion == "year": - # if not known but this is year, add '-' for --MM-DD unknown year format + # if year is not known, add '-' for year portion, + # to genereate --MM-DD unknown year format date_parts.append("-") # TODO: fix type error: "list[str | None]" is incompatible with "Iterable[str]" return "-".join(date_parts) # type: ignore diff --git a/src/undate/interval.py b/src/undate/interval.py new file mode 100644 index 0000000..ddfacdb --- /dev/null +++ b/src/undate/interval.py @@ -0,0 +1,183 @@ +# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None +from typing import Optional, Union + + +from undate import Undate +from undate.date import ONE_DAY, ONE_YEAR, Timedelta +from undate.converters.base import BaseDateConverter + + +class UndateInterval: + """A date range between two uncertain dates. + + :param earliest: Earliest undate + :type earliest: `undate.Undate` + :param latest: Latest undate + :type latest: `undate.Undate` + :param label: A string to label a specific undate interval, similar to labels of `undate.Undate`. + :type label: `str` + """ + + # date range between two undates + earliest: Union[Undate, None] + latest: Union[Undate, None] + label: Union[str, None] + + # TODO: think about adding an optional precision / length /size field + # using DatePrecision for intervals of any standard duration (decade, century) + + def __init__( + self, + earliest: Optional[Undate] = None, + latest: Optional[Undate] = None, + label: Optional[str] = None, + ): + # takes two undate objects; allows conversion from supported types + if earliest: + try: + earliest = Undate.to_undate(earliest) + except TypeError as err: + raise ValueError( + f"earliest date {earliest} cannot be converted to Undate" + ) from err + if latest: + try: + latest = Undate.to_undate(latest) + except TypeError as err: + raise ValueError( + f"latest date {latest} cannot be converted to Undate" + ) from err + + # check that the interval is valid + if latest and earliest and latest <= earliest: + raise ValueError(f"invalid interval {earliest}-{latest}") + + self.earliest = earliest + self.latest = latest + self.label = label + + def __str__(self) -> str: + # using EDTF syntax for open ranges + return "%s/%s" % (self.earliest or "..", self.latest or "") + + def format(self, format) -> str: + """format this undate interval as a string using the specified format; + for now, only supports named converters""" + converter_cls = BaseDateConverter.available_converters().get(format, None) + if converter_cls: + return converter_cls().to_string(self) + + raise ValueError(f"Unsupported format '{format}'") + + def __repr__(self) -> str: + if self.label: + return "" % (self.label, self) + return "" % self + + def __eq__(self, other) -> bool: + # currently doesn't support comparison with any other types + if not isinstance(other, UndateInterval): + return NotImplemented + # consider interval equal if both dates are equal + return self.earliest == other.earliest and self.latest == other.latest + + def duration(self) -> Timedelta: + """Calculate the duration between two undates. + Note that durations are inclusive (i.e., a closed interval), and + include both the earliest and latest date rather than the difference + between them. + + :returns: A duration + :rtype: Timedelta + """ + # what is the duration of this date range? + + # if range is open-ended, can't calculate + if self.earliest is None or self.latest is None: + raise NotImplementedError( + "Cannot calculate duration for open-ended interval" + ) + + # if both years are known, subtract end of range from beginning of start + if self.latest.known_year and self.earliest.known_year: + return self.latest.latest - self.earliest.earliest + ONE_DAY + + # if neither year is known... + elif not self.latest.known_year and not self.earliest.known_year: + # under what circumstances can we assume that if both years + # are unknown the dates are in the same year or sequential? + duration = self.latest.earliest - self.earliest.earliest + # if we get a negative, we've wrapped from end of one year + # to the beginning of the next; + # recalculate assuming second date is in the subsequent year + if duration.days < 0: + end = self.latest.earliest + ONE_YEAR + duration = end - self.earliest.earliest + + # add the additional day *after* checking for a negative + # or after recalculating with adjusted year + duration += ONE_DAY + + return duration + + else: + # is there any meaningful way to calculate duration + # if one year is known and the other is not? + raise NotImplementedError + + def __contains__(self, other: object) -> bool: + """Determine if another interval or date falls within this + interval. Supports comparison with :class:`UndateInterval` + or anything that can be converted with :meth:`Undate.to_undate`.""" + # support comparison with another interval or anything + # that can be converted to an Undate + if isinstance(other, UndateInterval): + # compare based on earliest/latest bounds + other_earliest = other.earliest + other_latest = other.latest + else: + # otherwise, try to convert to an Undate + try: + other = Undate.to_undate(other) + other_latest = other_earliest = other + except TypeError: + # if conversion fails, then we don't support comparison + raise + + # if either bound of the current interval is None, + # then it is an open interval and we don't need to check the other value. + # if the other value is set, then check that it falls within the + # bounds of this interval + return ( + self.earliest is None + or other_earliest is not None + and other_earliest >= self.earliest + ) and ( + self.latest is None + or other_latest is not None + and other_latest <= self.latest + ) + + def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: + """Determine the intersection or overlap between two :class:`UndateInterval` + objects and return a new interval. Returns None if there is no overlap. + """ + try: + # when both values are defined, return the inner bounds; + # if not, return whichever is not None, or None + earliest = ( + max(self.earliest, other.earliest) + if self.earliest and other.earliest + else self.earliest or other.earliest + ) + latest = ( + min(self.latest, other.latest) + if self.latest and other.latest + else self.latest or other.latest + ) + + # if this results in an invalid interval, initialization + # will throw an exception + return UndateInterval(earliest, latest) + except ValueError: + return None diff --git a/src/undate/undate.py b/src/undate/undate.py index 7df7634..be4454a 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -1,12 +1,41 @@ +from __future__ import annotations + import datetime +from enum import auto + import re -from calendar import monthrange +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from undate.interval import UndateInterval + +try: + # StrEnum was only added in python 3.11 + from enum import StrEnum +except ImportError: + # for python 3.10 or earlier, use third-party package + from strenum import StrEnum # type: ignore # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Dict, Optional, Union from undate.converters.base import BaseDateConverter -from undate.date import ONE_DAY, ONE_MONTH_MAX, ONE_YEAR, Date, DatePrecision, Timedelta +from undate.date import ONE_DAY, ONE_MONTH_MAX, Date, DatePrecision, Timedelta + + +class Calendar(StrEnum): + """Supported calendars""" + + GREGORIAN = auto() + HEBREW = auto() + ISLAMIC = auto() + + @staticmethod + def get_converter(calendar): + # calendar converter must be available with a name matching + # the title-case name of the calendar enum entry + converter_cls = BaseDateConverter.available_converters()[calendar.value.title()] + return converter_cls() class Undate: @@ -25,9 +54,9 @@ class Undate: converter: BaseDateConverter #: precision of the date (day, month, year, etc.) precision: DatePrecision + #: the calendar this date is using; Gregorian by default + calendar: Calendar = Calendar.GREGORIAN - #: known non-leap year - NON_LEAP_YEAR: int = 2022 # numpy datetime is stored as 64-bit integer, so min/max # depends on the time unit; assume days for now # See https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units @@ -43,7 +72,12 @@ def __init__( day: Optional[Union[int, str]] = None, converter: Optional[BaseDateConverter] = None, label: Optional[str] = None, + calendar: Optional[Union[str, Calendar]] = None, ): + # everything is optional but something is required + if all([val is None for val in [year, month, day]]): + raise ValueError("At least one of year, month, or day must be specified") + # keep track of initial values and which values are known # TODO: add validation: if str, must be expected length self.initial_values: Dict[str, Optional[Union[int, str]]] = { @@ -58,10 +92,25 @@ def __init__( elif year: self.precision = DatePrecision.YEAR + self.label = label + if calendar is not None: + self.set_calendar(calendar) + self.calendar_converter = Calendar.get_converter(self.calendar) + + self.calculate_earliest_latest(year, month, day) + + if converter is None: + # import all subclass definitions; initialize the default + converter_cls = BaseDateConverter.available_converters()[ + self.DEFAULT_CONVERTER + ] + converter = converter_cls() + self.converter = converter + + def calculate_earliest_latest(self, year, month, day): # special case: treat year = XXXX as unknown/none if year == "XXXX": year = None - if year is not None: # could we / should we use str.isnumeric here? try: @@ -81,32 +130,34 @@ def __init__( max_year = self.MAX_ALLOWABLE_YEAR # if month is passed in as a string but completely unknown, - # treat as none - # TODO: we should preserve this information somehow; - # difference between just a year and and an unknown month within a year - # maybe in terms of date precision ? + # treat as unknown/none (date precision already set in init) if month == "XX": month = None - min_month = 1 - max_month = 12 + # get first and last month from the calendar (not always 1 and 12) + # as well as min/max months + earliest_month = self.calendar_converter.first_month() + latest_month = self.calendar_converter.last_month(max_year) + + min_month = self.calendar_converter.min_month() + max_month = self.calendar_converter.max_month(max_year) if month is not None: try: # treat as an integer if we can month = int(month) # update initial value self.initial_values["month"] = month - min_month = max_month = month + earliest_month = latest_month = month except ValueError: # if not, calculate min/max for missing digits - min_month, max_month = self._missing_digit_minmax( + earliest_month, latest_month = self._missing_digit_minmax( str(month), min_month, max_month ) - # similar to month above — unknown day, but day-level granularity if day == "XX": day = None + # if day is numeric, use as is if isinstance(day, int) or isinstance(day, str) and day.isnumeric(): day = int(day) # update initial value - fully known day @@ -114,39 +165,42 @@ def __init__( min_day = max_day = day else: # if we have no day or partial day, calculate min / max - min_day = 1 - # if we know year and month (or max month), calculate exactly - if year and month and isinstance(year, int): - _, max_day = monthrange(int(year), max_month) - elif year is None and month: - # If we don't have year and month, - # calculate based on a known non-leap year - # (better than just setting 31, but still not great) - _, max_day = monthrange(self.NON_LEAP_YEAR, max_month) - else: - max_day = 31 + min_day = 1 # is min day ever anything other than 1 ? + rel_year = year if year and isinstance(year, int) else None + # use month if it is an integer; otherwise use previusly determined + # max month (which may not be 12 depending if partially unknown) + rel_month = month if month and isinstance(month, int) else latest_month + + max_day = self.calendar_converter.max_day(rel_year, rel_month) # if day is partially specified, narrow min/max further if day is not None: min_day, max_day = self._missing_digit_minmax(day, min_day, max_day) # TODO: special case, if we get a Feb 29 date with unknown year, - # must switch the min/max years to known leap years! + # should switch the min/max years to known leap years! # for unknowns, assume smallest possible value for earliest and # largest valid for latest - self.earliest = Date(min_year, min_month, min_day) - self.latest = Date(max_year, max_month, max_day) - - if converter is None: - # import all subclass definitions; initialize the default - converter_cls = BaseDateConverter.available_converters()[ - self.DEFAULT_CONVERTER - ] - converter = converter_cls() - self.converter = converter + # convert to Gregorian calendar so earliest/latest can always + # be used for comparison + self.earliest = Date( + *self.calendar_converter.to_gregorian(min_year, earliest_month, min_day) + ) + self.latest = Date( + *self.calendar_converter.to_gregorian(max_year, latest_month, max_day) + ) - self.label = label + def set_calendar(self, calendar: Union[str, Calendar]): + if calendar is not None: + # if not passed as a Calendar instance, do a lookup + if not isinstance(calendar, Calendar): + # look for calendar by upper-case name + try: + calendar = Calendar[calendar.upper()] + except KeyError as err: + raise ValueError(f"Calendar `{calendar}` is not supported") from err + self.calendar = calendar def __str__(self) -> str: # if any portion of the date is partially known, construct @@ -170,12 +224,11 @@ def __str__(self) -> str: return self.converter.to_string(self) def __repr__(self) -> str: - if self.label: - return "" % (self.label, self) - return "" % self + label_str = f" '{self.label}'" if self.label else "" + return f"" @classmethod - def parse(cls, date_string, format) -> Union["Undate", "UndateInterval"]: + def parse(cls, date_string, format) -> Union["Undate", UndateInterval]: """parse a string to an undate or undate interval using the specified format; for now, only supports named converters""" converter_cls = BaseDateConverter.available_converters().get(format, None) @@ -195,23 +248,19 @@ def format(self, format) -> str: raise ValueError(f"Unsupported format '{format}'") - def _comparison_type(self, other: object) -> "Undate": + @classmethod + def _comparison_type(cls, other: object) -> "Undate": """Common logic for type handling in comparison methods. Converts to Undate object if possible, otherwise raises - NotImplemented error. Currently only supports conversion - from :class:`datetime.date` + NotImplementedError exception. Uses :meth:`to_undate` for conversion. """ - - # support datetime.date by converting to undate - if isinstance(other, datetime.date): - other = Undate.from_datetime_date(other) - - # recommended to support comparison with arbitrary objects - if not isinstance(other, Undate): + # convert if possible; return NotImplemented if not + try: + return cls.to_undate(other) + except TypeError: + # recommended to support comparison with arbitrary objects return NotImplemented - return other - def __eq__(self, other: object) -> bool: # Note: assumes label differences don't matter for comparing dates @@ -221,13 +270,19 @@ def __eq__(self, other: object) -> bool: other = self._comparison_type(other) if other is NotImplemented: + # return NotImplemented to indicate comparison is not supported + # with this type return NotImplemented + # if both dates are fully known, then earliest/latest check + # is sufficient (and will work across calendars!) + # check for apparent equality + # - earliest/latest match and both have the same precision looks_equal = ( self.earliest == other.earliest and self.latest == other.latest - and self.initial_values == other.initial_values + and self.precision == other.precision ) # if everything looks the same, check for any unknowns in initial values # the same unknown date should NOT be considered equal @@ -237,7 +292,12 @@ def __eq__(self, other: object) -> bool: # in one format (i.e. X for missing digits). # If we support other formats, will need to normalize to common # internal format for comparison - if looks_equal and any("X" in str(val) for val in self.initial_values.values()): + if looks_equal and ( + # if any part of either date that is known is _partially_ known, + # then these dates are not equal + any([self.is_partially_known(p) for p in self.initial_values.keys()]) + or any([other.is_partially_known(p) for p in other.initial_values.keys()]) + ): return False return looks_equal @@ -301,10 +361,27 @@ def __contains__(self, other: object) -> bool: ] ) - @staticmethod - def from_datetime_date(dt_date: datetime.date): - """Initialize an :class:`Undate` object from a :class:`datetime.date`""" - return Undate(dt_date.year, dt_date.month, dt_date.day) + @classmethod + def to_undate(cls, other: object) -> "Undate": + """Convert arbitrary object to Undate, if possible. Raises TypeError + if conversion is not possible. + + Currently supports: + - :class:`datetime.date` or :class:`datetime.datetime` + - :class:`undate.date.Date` + + """ + match other: + case Undate(): + return other + case datetime.date() | datetime.datetime(): + return Undate(other.year, other.month, other.day) + case Date(): + # handle conversion from internal Date class + return Undate(other.year, other.month, other.day) + + case _: + raise TypeError(f"Conversion from {type(other)} is not supported") @property def known_year(self) -> bool: @@ -366,7 +443,9 @@ def duration(self) -> Timedelta: """What is the duration of this date? Calculate based on earliest and latest date within range, taking into account the precision of the date even if not all - parts of the date are known.""" + parts of the date are known. Note that durations are inclusive + (i.e., a closed interval) and include both the earliest and latest + date rather than the difference between them.""" # if precision is a single day, duration is one day # no matter when it is or what else is known @@ -405,6 +484,8 @@ def _missing_digit_minmax( # given a possible range, calculate min/max values for a string # with a missing digit + # TODO: test this method directly + # assuming two digit only (i.e., month or day) possible_values = [f"{n:02}" for n in range(min_val, max_val + 1)] # ensure input value has two digits @@ -429,92 +510,3 @@ def _missing_digit_minmax( min_val = int("".join(new_min_val)) max_val = int("".join(new_max_val)) return (min_val, max_val) - - -class UndateInterval: - """A date range between two uncertain dates. - - :param earliest: Earliest undate - :type earliest: `undate.Undate` - :param latest: Latest undate - :type latest: `undate.Undate` - :param label: A string to label a specific undate interval, similar to labels of `undate.Undate`. - :type label: `str` - """ - - # date range between two uncertain dates - earliest: Union[Undate, None] - latest: Union[Undate, None] - label: Union[str, None] - - def __init__( - self, - earliest: Optional[Undate] = None, - latest: Optional[Undate] = None, - label: Optional[str] = None, - ): - # for now, assume takes two undate objects - self.earliest = earliest - self.latest = latest - self.label = label - - def __str__(self) -> str: - # using EDTF syntax for open ranges - return "%s/%s" % (self.earliest or "..", self.latest or "") - - def format(self, format) -> str: - """format this undate interval as a string using the specified format; - for now, only supports named converters""" - converter_cls = BaseDateConverter.available_converters().get(format, None) - if converter_cls: - return converter_cls().to_string(self) - - raise ValueError(f"Unsupported format '{format}'") - - def __repr__(self) -> str: - if self.label: - return "" % (self.label, self) - return "" % self - - def __eq__(self, other) -> bool: - # consider interval equal if both dates are equal - return self.earliest == other.earliest and self.latest == other.latest - - def duration(self) -> Timedelta: - """Calculate the duration between two undates. - - :returns: A duration - :rtype: Timedelta - """ - # what is the duration of this date range? - - # if range is open-ended, can't calculate - if self.earliest is None or self.latest is None: - return NotImplemented - - # if both years are known, subtract end of range from beginning of start - if self.latest.known_year and self.earliest.known_year: - return self.latest.latest - self.earliest.earliest + ONE_DAY - - # if neither year is known... - elif not self.latest.known_year and not self.earliest.known_year: - # under what circumstances can we assume that if both years - # are unknown the dates are in the same year or sequential? - duration = self.latest.earliest - self.earliest.earliest - # if we get a negative, we've wrapped from end of one year - # to the beginning of the next; - # recalculate assuming second date is in the subsequent year - if duration.days < 0: - end = self.latest.earliest + ONE_YEAR - duration = end - self.earliest.earliest - - # add the additional day *after* checking for a negative - # or after recalculating with adjusted year - duration += ONE_DAY - - return duration - - else: - # is there any meaningful way to calculate duration - # if one year is known and the other is not? - raise NotImplementedError diff --git a/tests/test_converters/edtf/test_edtf_parser.py b/tests/test_converters/edtf/test_edtf_parser.py index e9a3fdb..73d4e02 100644 --- a/tests/test_converters/edtf/test_edtf_parser.py +++ b/tests/test_converters/edtf/test_edtf_parser.py @@ -8,6 +8,7 @@ "1984-05", "1984-12", "1001-03-30", + "1901-02-20", "1000/2000", "1000-01/2000-05-01", # level 1 @@ -45,7 +46,7 @@ def test_should_parse(date_string): assert edtf_parser.parse(date_string) -error_cases = ["1984-13", "Y1702"] +error_cases = ["1984-13", "Y1702", "1984-00", "1984-01-00"] @pytest.mark.parametrize("date_string", error_cases) diff --git a/tests/test_converters/edtf/test_edtf_transformer.py b/tests/test_converters/edtf/test_edtf_transformer.py index 66488f6..3e82eb1 100644 --- a/tests/test_converters/edtf/test_edtf_transformer.py +++ b/tests/test_converters/edtf/test_edtf_transformer.py @@ -1,7 +1,8 @@ import pytest + +from undate import Undate, UndateInterval from undate.converters.edtf.parser import edtf_parser from undate.converters.edtf.transformer import EDTFTransformer -from undate.undate import Undate, UndateInterval # for now, just test that valid dates can be parsed diff --git a/tests/test_converters/test_base.py b/tests/test_converters/test_base.py index 60d5d1e..a4ac52d 100644 --- a/tests/test_converters/test_base.py +++ b/tests/test_converters/test_base.py @@ -1,7 +1,12 @@ import logging import pytest -from undate.converters.base import BaseDateConverter +from undate.converters.base import BaseDateConverter, BaseCalendarConverter +from undate.converters.calendars import ( + GregorianDateConverter, + HebrewDateConverter, + IslamicDateConverter, +) class TestBaseDateConverter: @@ -18,7 +23,7 @@ def test_available_converters(self): def test_converters_are_unique(self): assert len(BaseDateConverter.available_converters()) == len( - BaseDateConverter.__subclasses__() + BaseDateConverter.subclasses() ), "Formatter names have to be unique." def test_parse_not_implemented(self): @@ -29,6 +34,18 @@ def test_parse_to_string(self): with pytest.raises(NotImplementedError): BaseDateConverter().to_string(1991) + def test_subclasses(self): + # define a nested subclass + class SubSubConverter(IslamicDateConverter): + pass + + subclasses = BaseDateConverter.subclasses() + assert BaseCalendarConverter not in subclasses + assert IslamicDateConverter in subclasses + assert HebrewDateConverter in subclasses + assert GregorianDateConverter in subclasses + assert SubSubConverter in subclasses + def test_import_converters_import_only_once(caplog): # clear the cache, since any instantiation of an Undate @@ -60,5 +77,17 @@ class ISO8601DateFormat2(BaseDateConverter): name = "ISO8601" # duplicates existing formatter assert len(BaseDateConverter.available_converters()) != len( - BaseDateConverter.__subclasses__() + BaseDateConverter.subclasses() ) + + +class TestBaseCalendarConverter: + def test_not_implemented(self): + with pytest.raises(NotImplementedError): + BaseCalendarConverter().min_month() + with pytest.raises(NotImplementedError): + BaseCalendarConverter().max_month(1900) + with pytest.raises(NotImplementedError): + BaseCalendarConverter().max_day(1900, 12) + with pytest.raises(NotImplementedError): + BaseCalendarConverter().to_gregorian(1900, 12, 31) diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py new file mode 100644 index 0000000..c3c8b7c --- /dev/null +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_converter.py @@ -0,0 +1,155 @@ +import pytest + +from undate.converters.calendars import HebrewDateConverter +from undate.converters.calendars.hebrew.transformer import HebrewUndate +from undate.undate import Calendar, Undate +from undate.date import DatePrecision, Date + + +class TestHebrewDateConverter: + def test_parse(self): + # day + # 26 Tammuz 4816: Tammuz = month 4 (17 July, 1056 Gregorian) + date_str = "26 Tammuz 4816" + date = HebrewDateConverter().parse(date_str) + assert date == HebrewUndate(4816, 4, 26) + assert date.calendar == Calendar.HEBREW + assert date.precision == DatePrecision.DAY + assert date.label == f"{date_str} {HebrewDateConverter.calendar_name}" + + # month + date_str = "Ṭevet 5362" + date = HebrewDateConverter().parse(date_str) + assert date == HebrewUndate(5362, 10) # Teveth = month 10 + assert date.calendar == Calendar.HEBREW + assert date.precision == DatePrecision.MONTH + assert date.label == f"{date_str} {HebrewDateConverter.calendar_name}" + + # year + date_str = "4932" + date = HebrewDateConverter().parse(date_str) + assert date == HebrewUndate(4932) + assert date.calendar == Calendar.HEBREW + assert date.precision == DatePrecision.YEAR + assert date.label == f"{date_str} {HebrewDateConverter.calendar_name}" + + def test_gregorian_earliest_latest(self): + # earliest/latest should be converted to Gregorian for comparison + + # full date + + # 26 Tammuz 4816: 17 July, 1056; Tammuz = month 4 + date = HebrewUndate(4816, 4, 26) + assert date.earliest == Date(1056, 7, 17) + assert date.latest == Date(1056, 7, 17) + # 13 Tishrei 5416 Anno Mundi (1655-10-14) + date = HebrewUndate(5416, 7, 13) # Tishrei = month 7 + assert date.earliest == Date(1655, 10, 14) + assert date.latest == Date(1655, 10, 14) + + # month + + # Ṭevet 5362 Anno Mundi (25 December, 1601 – 22 January, 1602) + date = HebrewUndate(5362, 10) + assert date.earliest == Date(1601, 12, 25) + assert date.latest == Date(1602, 1, 22) + + # year + # 5416 : October 1655 to September 1656 + date = HebrewUndate(5416) + assert date.earliest == Date(1655, 10, 2) + assert date.latest == Date(1656, 9, 18) + + def test_parse_error(self): + # a string we can't parse should raise an error + with pytest.raises(ValueError): + HebrewDateConverter().parse("January 2, 1991") + # empty string should also error + with pytest.raises(ValueError): + HebrewDateConverter().parse("") + + # non-string input should raise a type error + with pytest.raises(TypeError): + HebrewDateConverter().parse(42) + + with pytest.raises(TypeError): + HebrewDateConverter().parse({"foo": "bar"}) + + def test_partially_known(self): + # hebrew dates get existing partially unknown behavior + + converter = HebrewDateConverter() + + # hebrew first/last month are not the same as min/max + unknown_month = HebrewUndate(1243, "XX") + assert unknown_month.precision == DatePrecision.MONTH + assert unknown_month.earliest == Date( + *converter.to_gregorian(1243, converter.first_month(), 1) + ) + last_month = converter.last_month(year=1243) + assert unknown_month.latest == Date( + *converter.to_gregorian( + 1243, last_month, converter.max_day(1243, last_month) + ) + ) + + partially_unknown_month = HebrewUndate(1243, "1X") + assert partially_unknown_month.precision == DatePrecision.MONTH + assert partially_unknown_month.earliest == Date( + *converter.to_gregorian(1243, 10, 1) + ) + # for unknown digit, assume largest possible value instead + # of last semantic monthin the year + last_month = converter.max_month(year=1243) + last_day = converter.max_day(1243, last_month) + assert partially_unknown_month.latest == Date( + *converter.to_gregorian(1243, last_month, last_day) + ) + + # second month has 29 days + unknown_day = HebrewUndate(1243, 2, "XX") + assert unknown_day.precision == DatePrecision.DAY + assert unknown_day.earliest == Date(*converter.to_gregorian(1243, 2, 1)) + assert unknown_day.latest == Date(*converter.to_gregorian(1243, 2, 29)) + + partially_unknown_day = HebrewUndate(1243, 2, "2X") + assert partially_unknown_day.precision == DatePrecision.DAY + assert partially_unknown_day.earliest == Date( + *converter.to_gregorian(1243, 2, 20) + ) + assert partially_unknown_day.latest == Date( + *converter.to_gregorian(1243, 2, 29) + ) + + def test_compare_across_calendars(self): + # only day-precision dates can be exactly equal across calendars + + # 26 Tammuz 4816: Tammuz = month 4 (17 July, 1056 Gregorian) + assert HebrewUndate(4816, 4, 26) == Undate(1056, 7, 17) + # 13 Tishrei 5416; Tieshrei = month 7 (1655-10-14) + assert HebrewUndate(5416, 7, 13) == Undate(1655, 10, 14) + + # greater than / less than + assert HebrewUndate(4816) < Undate(1060) + assert HebrewUndate(5416) < Undate(1660) + assert HebrewUndate(5416, 7) > Undate(1655, 1) + assert HebrewUndate(4816, 4, 26) > Undate(1055, 5) + + # 26 Tammuz 4816: Tammuz = month 4 (17 July, 1056) + # so it falls within or is c ontained by July 1056 + assert HebrewUndate(4816, 4, 26) in Undate(1056, 7) + assert HebrewUndate(4816, 4, 26) not in Undate(1054) + + # sorting + sorted_dates = sorted( + [ + HebrewUndate(4816, 4, 26), # 1056-07-17 + HebrewUndate(5416), # 1655 + HebrewUndate(500), # -3261 + Undate(1995), + Undate(33), + Undate(1350), + ] + ) + expected_gregorian_years = [-3261, 33, 1056, 1350, 1655, 1995] + assert [d.earliest.year for d in sorted_dates] == expected_gregorian_years diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py new file mode 100644 index 0000000..69b929e --- /dev/null +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py @@ -0,0 +1,65 @@ +import pytest +from lark.exceptions import UnexpectedCharacters, UnexpectedEOF + +from undate.converters.calendars.hebrew.parser import hebrew_parser + + +# for now, just test that valid dates can be parsed + +testcases = [ + # year + "5362", + # month + year + # - with and without accent + "Ṭevet 5362", + "Tevet 5362", + "Elul 4932", + "Sivan 5581", + # variant month name, with or without accent + "Ṭeveth 5362", + "Teveth 5362", + "Iyyar 1526", + "Iyar 1526", + # day month year + "26 Tammuz 4816", + "7 Heshvan 5425", + "26 Tishrei 5416", + "26 Tishri 5416", + "14 Adar 5403", + "14 Adar I 5403", + "9 Adar II 5404", + "9 Adar Bet 5404", + # two and 1 digit years + "536", + "53", + "3", +] + + +@pytest.mark.parametrize("date_string", testcases) +def test_should_parse(date_string): + assert hebrew_parser.parse(date_string) + + +error_cases = [ + # invalid days + ("0 Tammuz 5403", UnexpectedCharacters), + ("31 Tishri 5403", UnexpectedCharacters), + # month alone + ("Tishri", UnexpectedEOF), + # month day only + ("12 Heshvan", UnexpectedEOF), + # invalid month + ("Foo 383", UnexpectedCharacters), + # wrong format + ("2024-10-02", UnexpectedCharacters), + # year month day not supported + ("5403 Adar", UnexpectedCharacters), + ("5403 Adar 14", UnexpectedCharacters), +] + + +@pytest.mark.parametrize("date_string,exception", error_cases) +def test_should_error(date_string, exception): + with pytest.raises(exception): + hebrew_parser.parse(date_string) diff --git a/tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py new file mode 100644 index 0000000..6e4a5e6 --- /dev/null +++ b/tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py @@ -0,0 +1,43 @@ +import pytest +from undate.converters.calendars.hebrew.parser import hebrew_parser +from undate.converters.calendars.hebrew.transformer import ( + HebrewDateTransformer, + HebrewUndate, +) +from undate.undate import Undate, Calendar +from undate.date import DatePrecision + + +def test_hebrew_undate(): + assert HebrewUndate(848).calendar == Calendar.HEBREW + + +testcases = [ + # examples from Princeton Geniza Project + # date conversions checked with https://www.muqawwim.com/ + # 26 Tammuz 4816; Tammuz = month 4 + ("26 Tammuz 4816", HebrewUndate(4816, 4, 26), DatePrecision.DAY), + ("Tammuz 4816", HebrewUndate(4816, 4), DatePrecision.MONTH), + ("4816", HebrewUndate(4816), DatePrecision.YEAR), + # 26 Tishrei 5416: Tishrei = month 7 + ("26 Tishrei 5416", HebrewUndate(5416, 7, 26), DatePrecision.DAY), + # Ṭeveth = month 10 + ("Ṭevet 5362", HebrewUndate(5362, 10), DatePrecision.MONTH), + ("5362", HebrewUndate(5362), DatePrecision.YEAR), + # add when we support parsing ranges: + # Adar I and Adar II 5453 : (1693 CE) +] + + +@pytest.mark.parametrize("date_string,expected,expected_precision", testcases) +def test_transform(date_string, expected, expected_precision): + transformer = HebrewDateTransformer(visit_tokens=True) + # parse the input string, then transform to undate object + parsetree = hebrew_parser.parse(date_string) + transformed_date = transformer.transform(parsetree) + assert transformed_date == expected + # currently only undates have date precision + if isinstance(transformed_date, Undate): + assert transformed_date.precision == expected_precision + # transformer doesn't have access to date string, + # label will need to be set by the converter class diff --git a/tests/test_converters/test_calendars/test_islamic/test_islamic_converter.py b/tests/test_converters/test_calendars/test_islamic/test_islamic_converter.py new file mode 100644 index 0000000..4acacd0 --- /dev/null +++ b/tests/test_converters/test_calendars/test_islamic/test_islamic_converter.py @@ -0,0 +1,154 @@ +import pytest + +from undate.converters.calendars import IslamicDateConverter +from undate.converters.calendars.islamic.transformer import IslamicUndate +from undate.undate import Calendar, Undate +from undate.date import DatePrecision, Date + + +class TestIslamicDateConverter: + def test_parse(self): + # day + # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 + date_str = "7 Jumādā I 1243" + date = IslamicDateConverter().parse(date_str) + assert date == IslamicUndate(1243, 5, 7) + assert date.calendar == Calendar.ISLAMIC + assert date.precision == DatePrecision.DAY + assert date.label == f"{date_str} {IslamicDateConverter.calendar_name}" + + # month + date_str = "Rajab 495" + date = IslamicDateConverter().parse(date_str) + assert date == IslamicUndate(495, 7) # Rajab is month 7 + assert date.calendar == Calendar.ISLAMIC + assert date.precision == DatePrecision.MONTH + assert date.label == f"{date_str} {IslamicDateConverter.calendar_name}" + # Gregorian earliest/ latest + assert date.earliest == Date(1102, 4, 28) + assert date.latest == Date(1102, 5, 27) + + # year + date_str = "441" + date = IslamicDateConverter().parse(date_str) + assert date == IslamicUndate(441) + assert date.calendar == Calendar.ISLAMIC + assert date.precision == DatePrecision.YEAR + assert date.label == f"{date_str} {IslamicDateConverter.calendar_name}" + # Gregorian earliest/ latest + assert date.earliest == Date(1049, 6, 11) + assert date.latest == Date(1050, 5, 31) + + def test_gregorian_earliest_latest(self): + # earliest/latest should be converted to Gregorian for comparison + + # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 + date = IslamicUndate(1243, 5, 7) + assert date.earliest == Date(1827, 11, 26) + assert date.latest == Date(1827, 11, 26) + + # Jumādā I 1243 : 1827-11-20 to 1827-12-19 + date = IslamicUndate(1243, 5) + assert date.earliest == Date(1827, 11, 20) + assert date.latest == Date(1827, 12, 19) + + # Rajab 495: 1102-04-28 to 1102-05-27 (Rajab = month 7) + date = IslamicUndate(495, 7) + assert date.earliest == Date(1102, 4, 28) + assert date.latest == Date(1102, 5, 27) + + # 441 : 1049-06-11 to 1050-05-31 + date = IslamicUndate(441) + assert date.earliest == Date(1049, 6, 11) + assert date.latest == Date(1050, 5, 31) + + # examples from ISMI data (reformatted to day month year) + # 14 Rabīʿ I 901 : 1495-12-11 (Rabi 1 = month 3 ) + date = IslamicUndate(901, 3, 14) + assert date.earliest == Date(1495, 12, 11) + assert date.latest == Date(1495, 12, 11) + + # 884 : 1479-04-03 to 1480-03-21 + date = IslamicUndate(884) + assert date.earliest == Date(1479, 4, 3) + assert date.latest == Date(1480, 3, 21) + + def test_parse_error(self): + # a string we can't parse should raise an error + with pytest.raises(ValueError): + IslamicDateConverter().parse("January 2, 1991") + # empty string should also error + with pytest.raises(ValueError): + IslamicDateConverter().parse("") + + def test_partially_known(self): + # hijri dates get existing partially unknown behavior + unknown_month = IslamicUndate(1243, "XX") + assert unknown_month.precision == DatePrecision.MONTH + assert unknown_month.earliest == Date( + *IslamicDateConverter().to_gregorian(1243, 1, 1) + ) + assert unknown_month.latest == Date( + *IslamicDateConverter().to_gregorian(1243, 12, 30) + ) + + partially_unknown_month = IslamicUndate(1243, "1X") + assert partially_unknown_month.precision == DatePrecision.MONTH + assert partially_unknown_month.earliest == Date( + *IslamicDateConverter().to_gregorian(1243, 10, 1) + ) + assert partially_unknown_month.latest == Date( + *IslamicDateConverter().to_gregorian(1243, 12, 30) + ) + + unknown_day = IslamicUndate(1243, 2, "XX") + assert unknown_day.precision == DatePrecision.DAY + assert unknown_day.earliest == Date( + *IslamicDateConverter().to_gregorian(1243, 2, 1) + ) + # second month has 29 days + assert unknown_day.latest == Date( + *IslamicDateConverter().to_gregorian(1243, 2, 29) + ) + partially_unknown_day = IslamicUndate(1243, 2, "2X") + assert partially_unknown_day.precision == DatePrecision.DAY + assert partially_unknown_day.earliest == Date( + *IslamicDateConverter().to_gregorian(1243, 2, 20) + ) + assert partially_unknown_day.latest == Date( + *IslamicDateConverter().to_gregorian(1243, 2, 29) + ) + + def test_compare_across_calendars(self): + # only day-precision dates can be exactly equal across calendars + + # 7 Jumādā I 1243 Hijrī : 26 November, 1827; Jumada I = month 5 + assert IslamicUndate(1243, 5, 7) == Undate(1827, 11, 26) + # 14 Rabīʿ I 901 : 1495-12-11 (Rabi 1 = month 3 ) + assert IslamicUndate(901, 3, 14) == Undate(1495, 12, 11) + + # greater than / less than + assert IslamicUndate(901) < Undate(1500) + assert IslamicUndate(901) > Undate(1450) + # Jumādā I 1243 : 1827-11-20 to 1827-12-19 + assert IslamicUndate(1243, 5) > Undate(1827, 10) + assert IslamicUndate(1243, 5) < Undate(1828, 1) + + # 7 Jumādā I 1243 Hijrī : 26 November, 1827, so it falls + # within (or is contained by) November 1827 + assert IslamicUndate(1243, 5, 7) in Undate(1827, 11) + assert IslamicUndate(1243, 5, 7) not in Undate(1827, 10) + + # sorting + sorted_dates = sorted( + [ + IslamicUndate(884), # 1479 to 1480 Gregorian + IslamicUndate(441), # 1049 to 1050 Gregorian + IslamicUndate(901), # 1495 to 1495 Gregorian + Undate(1995), + Undate(33), + Undate(1350), + ] + ) + expected_gregorian_years = [33, 1049, 1350, 1479, 1495, 1995] + assert [d.earliest.year for d in sorted_dates] == expected_gregorian_years diff --git a/tests/test_converters/test_calendars/test_islamic/test_islamic_parser.py b/tests/test_converters/test_calendars/test_islamic/test_islamic_parser.py new file mode 100644 index 0000000..de4901e --- /dev/null +++ b/tests/test_converters/test_calendars/test_islamic/test_islamic_parser.py @@ -0,0 +1,78 @@ +import pytest +from lark.exceptions import LarkError + +from undate.converters.calendars.islamic.parser import islamic_parser + + +# for now, just test that valid dates can be parsed + +testcases = [ + # year + "521", + # month + year + # - with and without accent + "al-Muḥarram 900", + "al-Muharram 900", + "Safar 581", + "Ṣafar 581", + # variant month name, with or without accent + "Muharram 900", + "Muḥarram 900", + "Rabīʿ al-ʾAwwal 901", + "Rabi' I 901", + "Rabīʿ ath-Thānī 343", + "Rabīʿ II 343", + "Jumādā al-ʾAwwal 1081", + "Jumada al-`Awwal 1081", + "Jumādā I 1081", + "Jumādā ath-Thāniyah 901", + "Jumada ath-Thaniyah 901", + "Jumādā II 981", + "Rajab 942", + "Shaʿbān 900", + "Sha'ban 900", + "Ramaḍān 903", + "Ramadan 903", + "Shawwāl 1042", + "Shawwal 1042", + "Zū al-Qaʿdah 124", + "Dhu l-Qa'da 124", + # day month year + "7 Jumādā I 1243", + "29 Muḥarram 1243", + "30 Muḥarram 1243", + "Rabīʿ I 901", + "12 Rabīʿ I 901", + # two and 1 digit years + "12 Rabīʿ I 90", + "12 Rabīʿ I 9", +] + + +@pytest.mark.parametrize("date_string", testcases) +def test_should_parse(date_string): + assert islamic_parser.parse(date_string) + + +error_cases = [ + # invalid days + "0 Muḥarram 1243", + "31 Muḥarram 1243", + # month alone + "Shawwal", + # month day only + "12 Shawwal", + # invalid month + "Foo 383", + # wrong format + "2024-10-02", + # year month day not supported + "901 Rabīʿ I", + "901 Rabīʿ I 12", +] + + +@pytest.mark.parametrize("date_string", error_cases) +def test_should_error(date_string): + with pytest.raises(LarkError): + islamic_parser.parse(date_string) diff --git a/tests/test_converters/test_calendars/test_islamic/test_islamic_transformer.py b/tests/test_converters/test_calendars/test_islamic/test_islamic_transformer.py new file mode 100644 index 0000000..951a9f8 --- /dev/null +++ b/tests/test_converters/test_calendars/test_islamic/test_islamic_transformer.py @@ -0,0 +1,49 @@ +import pytest +from undate.converters.calendars.islamic.parser import islamic_parser +from undate.converters.calendars.islamic.transformer import ( + IslamicDateTransformer, + IslamicUndate, +) +from undate.undate import Undate, Calendar +from undate.date import DatePrecision + + +def test_islamic_undate(): + assert IslamicUndate(848).calendar == Calendar.ISLAMIC + + +testcases = [ + # examples from Princeton Geniza Project + # date conversions checked with https://www.muqawwim.com/ + # Monday, 7 Jumādā I 1243 Hijrī (26 November, 1827 CE); Jumada I = month 5 + ("7 Jumādā I 1243", IslamicUndate(1243, 5, 7), DatePrecision.DAY), + ("Jumādā I 1243", IslamicUndate(1243, 5), DatePrecision.MONTH), + ("1243", IslamicUndate(1243), DatePrecision.YEAR), + # Gregorian: UndateInterval(Undate(1827, 7, 25), Undate(1828, 7, 13)), + # Zū al-Qaʿdah / Dhu l-Qa'da = month 11 + ("27 Dhū l-Qaʿda 632", IslamicUndate(632, 11, 27), DatePrecision.DAY), + # Rajab = month 7 + ("Rajab 495", IslamicUndate(495, 7), DatePrecision.MONTH), + ("441", IslamicUndate(441), DatePrecision.YEAR), + # examples from ISMI data (reformatted to day month year) + # Rabi 1 = month 3 + ("14 Rabīʿ I 901", IslamicUndate(901, 3, 14), DatePrecision.DAY), + ("884", IslamicUndate(884), DatePrecision.YEAR), + # Gregorian: UndateInterval(Undate(1479, 4, 3), Undate(1480, 3, 21)), + # add when we support parsing ranges: + # 900 Muḥarram 1 - 999 Ḏu al-Ḥijjaẗ 29 : 1494-10-11 to 1591-10-18 +] + + +@pytest.mark.parametrize("date_string,expected,expected_precision", testcases) +def test_transform(date_string, expected, expected_precision): + transformer = IslamicDateTransformer(visit_tokens=True) + # parse the input string, then transform to undate object + parsetree = islamic_parser.parse(date_string) + transformed_date = transformer.transform(parsetree) + assert transformed_date == expected + # currently only undates have date precision + if isinstance(transformed_date, Undate): + assert transformed_date.precision == expected_precision + # transformer doesn't have access to date string, + # label will need to be set by the converter class diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index f159970..3262e46 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -1,7 +1,6 @@ import pytest from undate.converters.edtf import EDTFDateConverter -from undate.date import DatePrecision -from undate.undate import Undate, UndateInterval +from undate import Undate, UndateInterval class TestEDTFDateConverter: @@ -64,8 +63,8 @@ def test_to_string(self): # if converter can't generate a string for the date, # it should return a value error - empty_undate = Undate() - empty_undate.precision = DatePrecision.DECADE - with pytest.raises(ValueError): - EDTFDateConverter().to_string(empty_undate) + # empty_undate = Undate() # undate with no date information no longer supported + # empty_undate.precision = DatePrecision.DECADE + # with pytest.raises(ValueError): + # EDTFDateConverter().to_string(empty_undate) # TODO: override missing digit and confirm replacement diff --git a/tests/test_converters/test_iso8601.py b/tests/test_converters/test_iso8601.py index 73f645e..519eeb2 100644 --- a/tests/test_converters/test_iso8601.py +++ b/tests/test_converters/test_iso8601.py @@ -1,5 +1,5 @@ +from undate import Undate, UndateInterval from undate.converters.iso8601 import ISO8601DateFormat -from undate.undate import Undate, UndateInterval class TestISO8601DateFormat: diff --git a/tests/test_interval.py b/tests/test_interval.py new file mode 100644 index 0000000..cf1a716 --- /dev/null +++ b/tests/test_interval.py @@ -0,0 +1,236 @@ +import calendar +import datetime + +import pytest + +from undate import Undate, UndateInterval +from undate.date import Timedelta + + +class TestUndateInterval: + def test_init_types(self): + # datetime.date - autoconvert + interval = UndateInterval(datetime.date(2022, 1, 1), None) + assert isinstance(interval.earliest, Undate) + interval = UndateInterval(None, datetime.date(2022, 1, 1)) + assert isinstance(interval.latest, Undate) + + # unsupported type should raise exception + with pytest.raises( + ValueError, match="earliest date 2022 cannot be converted to Undate" + ): + UndateInterval(2022, None) + + with pytest.raises( + ValueError, match="latest date 1982 cannot be converted to Undate" + ): + UndateInterval(None, "1982") + + def test_init_validation(self): + with pytest.raises(ValueError, match="invalid interval"): + UndateInterval(Undate(2020), Undate(1010)) + + def test_str(self): + # 2022 - 2023 + assert str(UndateInterval(Undate(2022), Undate(2023))) == "2022/2023" + # 2022 - 2023-05 + assert str(UndateInterval(Undate(2022), Undate(2023, 5))) == "2022/2023-05" + # 2022-11-01 to 2022-11-07 + assert ( + str(UndateInterval(Undate(2022, 11, 1), Undate(2023, 11, 7))) + == "2022-11-01/2023-11-07" + ) + + def test_format(self): + interval = UndateInterval(Undate(2000), Undate(2001)) + assert interval.format("EDTF") == "2000/2001" + assert interval.format("ISO8601") == "2000/2001" + + # Open-ended intervals + open_start = UndateInterval(latest=Undate(2000)) + assert open_start.format("EDTF") == "../2000" + assert open_start.format("ISO8601") == "/2000" + + open_end = UndateInterval(earliest=Undate(2000)) + assert open_end.format("EDTF") == "2000/.." + assert open_end.format("ISO8601") == "2000/" + + def test_repr(self): + assert ( + repr(UndateInterval(Undate(2022), Undate(2023))) + == "" + ) + assert ( + repr(UndateInterval(Undate(2022), Undate(2023), label="Fancy Epoch")) + == "" + ) + + def test_str_open_range(self): + # 900 - + assert str(UndateInterval(Undate(900))) == "0900/" + # - 1900 + assert str(UndateInterval(latest=Undate(1900))) == "../1900" + # - 1900-12 + assert str(UndateInterval(latest=Undate(1900, 12))) == "../1900-12" + + def test_eq(self): + assert UndateInterval(Undate(2022), Undate(2023)) == UndateInterval( + Undate(2022), Undate(2023) + ) + assert UndateInterval(Undate(2022), Undate(2023, 5)) == UndateInterval( + Undate(2022), Undate(2023, 5) + ) + assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5)) + + def test_eq_type_check(self): + # doesn't currently support comparison with anything else + interval = UndateInterval(Undate(900)) + # returns NotImplemented if comparison with this type is not supported + assert interval.__eq__("foo") == NotImplemented + + def test_not_eq(self): + assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval( + Undate(2022), Undate(2024) + ) + assert UndateInterval(Undate(2022), Undate(2023, 5)) != UndateInterval( + Undate(2022), Undate(2023, 6) + ) + assert UndateInterval(Undate(2022), Undate(2023, 5)) != UndateInterval( + Undate(2022), Undate(2023) + ) + assert UndateInterval(Undate(2022, 5)) != UndateInterval(Undate(2022, 6)) + + def test_min_year_non_leapyear(self): + assert not calendar.isleap(Undate.MIN_ALLOWABLE_YEAR) + + def test_duration(self): + week_duration = UndateInterval( + Undate(2022, 11, 1), Undate(2022, 11, 7) + ).duration() + assert isinstance(week_duration, Timedelta) + assert week_duration.days == 7 + + twomonths = UndateInterval(Undate(2022, 11), Undate(2022, 12)).duration() + # november - december = 30 days + 31 days + assert twomonths.days == 30 + 31 + + twoyears = UndateInterval(Undate(2021), Undate(2022)).duration() + assert twoyears.days == 365 * 2 + + # special case: month/day with no year (assumes same year) + week_noyear_duration = UndateInterval( + Undate(None, 11, 1), Undate(None, 11, 7) + ).duration() + assert week_noyear_duration.days == 7 + # special case 2: month/day with no year, wrapping from december to january + # (assumes sequential years) + month_noyear_duration = UndateInterval( + Undate(None, 12, 1), Undate(None, 1, 1) + ).duration() + assert month_noyear_duration.days == 32 + + # real world test cases from Shakespeare and Company Project data; + # second date is a year minus one day in the future + month_noyear_duration = UndateInterval( + Undate(None, 6, 7), Undate(None, 6, 6) + ).duration() + assert month_noyear_duration.days == 365 + + # durations that span february in unknown years should assume + # non-leap years + jan_march_duration = UndateInterval( + Undate(None, 2, 28), Undate(None, 3, 1) + ).duration() + assert jan_march_duration.days == 2 + + # duration is not supported for open-ended intervals + with pytest.raises( + NotImplementedError, match="Cannot calculate.*open-ended interval" + ): + assert UndateInterval(Undate(2000), None).duration() + + # one year set and the other not currently raises not implemented error + with pytest.raises(NotImplementedError): + UndateInterval(Undate(2000), Undate(month=10)).duration() + + def test_intersection(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + # no intersection + assert century11th.intersection(century20th) is None + # should work in either direction + assert century20th.intersection(century11th) is None + + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # intersection of an interval completely contained in another + # returns an interval equivalent to the smaller one + assert century20th.intersection(decade1990s) == decade1990s + assert decade1990s.intersection(century20th) == decade1990s + + # partial overlap + nineties_oughts = UndateInterval(Undate(1990), Undate(2009)) + assert century20th.intersection(nineties_oughts) == UndateInterval( + Undate(1990), Undate(2000) + ) + + # intersections between half open intervals + after_c11th = UndateInterval(Undate(1001), None) + assert after_c11th.intersection(century20th) == century20th + assert after_c11th.intersection(decade1990s) == decade1990s + + before_20th = UndateInterval(None, Undate(1901)) + assert before_20th.intersection(decade1990s) is None + assert before_20th.intersection(century11th) == century11th + assert before_20th.intersection(after_c11th) == UndateInterval( + Undate(1001), Undate(1901) + ) + + def test_contains(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # an interval DOES contain itself + for interval in [century11th, century20th, decade1990s]: + assert interval in interval + + # checking if an interval is within another interval + assert decade1990s in century20th + assert decade1990s not in century11th + assert century11th not in decade1990s + assert century20th not in decade1990s + # a specific date can be contained by an interval + y2k = Undate(2000) + assert y2k in century20th + assert y2k not in century11th + # partially known date should work too + april_someyear = Undate("198X", 4) + assert april_someyear in century20th + assert april_someyear not in century11th + # conversion from datetime.date also works + assert datetime.date(1922, 5, 1) in century20th + # unsupported types result in a type error + with pytest.raises(TypeError): + assert "nineteen-eighty-four" in century20th + + # contains check with half-open intervals + after_c11th = UndateInterval(Undate(1001), None) + before_20th = UndateInterval(None, Undate(1901)) + # neither of them contains the other + assert after_c11th not in before_20th + assert before_20th not in after_c11th + # nor are they contained by a smaller range + assert after_c11th not in decade1990s + assert before_20th not in decade1990s + + # all of our previous test dates are in the 1900s, + # so they are after the 11th century and not before the 20th + for period in [decade1990s, y2k, april_someyear]: + assert period in after_c11th + assert period not in before_20th + + # fully open interval - is this even meaningful? + whenever = UndateInterval(None, None) + assert decade1990s in whenever + # NOTE: an interval contains itself or an equivalent interval, + # but that may not make sense for open intervals... + assert whenever in whenever diff --git a/tests/test_undate.py b/tests/test_undate.py index 65360d3..18e03b0 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,9 +1,10 @@ -import calendar -from datetime import date +from datetime import date, datetime import pytest -from undate.date import DatePrecision, Timedelta -from undate.undate import Undate, UndateInterval + +from undate import Undate, UndateInterval, Calendar +from undate.converters.base import BaseCalendarConverter +from undate.date import Date, DatePrecision, Timedelta class TestUndate: @@ -25,11 +26,12 @@ def test_partially_known_str(self): # assert str(Undate(2022, day=7)) == "2022-XX-07" @ currently returns 2022-07 def test_repr(self): - assert repr(Undate(2022, 11, 7)) == "" + assert repr(Undate(2022, 11, 7)) == "" assert ( repr(Undate(2022, 11, 7, label="A Special Day")) - == "" + == "" ) + assert repr(Undate(484, calendar=Calendar.ISLAMIC)) == "" def test_init_str(self): assert Undate("2000").earliest.year == 2000 @@ -117,20 +119,51 @@ def test_init_partially_known_day(self): # (currently causes an exception because min/max years are not leap years) # Undate(None, 2, 29) + def test_calendar(self): + assert Undate(2024).calendar == Calendar.GREGORIAN + # by name, any case + assert Undate(848, calendar="ISLAMIC").calendar == Calendar.ISLAMIC + assert Undate(848, calendar="islamic").calendar == Calendar.ISLAMIC + # by enum + assert Undate(848, calendar=Calendar.ISLAMIC).calendar == Calendar.ISLAMIC + # invalid + with pytest.raises(ValueError, match="Calendar `foobar` is not supported"): + Undate(848, calendar="foobar") + def test_init_invalid(self): with pytest.raises(ValueError): - Undate("19xx") + Undate("19??") + + with pytest.raises(ValueError, match="At least one of year, month, or day"): + Undate() def test_invalid_date(self): # invalid month should raise an error with pytest.raises(ValueError): Undate(1990, 22) - def test_from_datetime_date(self): - undate_from_date = Undate.from_datetime_date(date(2001, 3, 5)) + def test_to_undate(self): + undate_from_date = Undate.to_undate(date(2001, 3, 5)) assert isinstance(undate_from_date, Undate) assert undate_from_date == Undate(2001, 3, 5) + now = datetime.now() + undate_from_dt = Undate.to_undate(now) + assert isinstance(undate_from_dt, Undate) + assert undate_from_dt == Undate(now.year, now.month, now.day) + + # from internal Date object + y2k = Date(2000) + y2k_to_undate = Undate.to_undate(y2k) + assert isinstance(y2k_to_undate, Undate) + assert int(y2k_to_undate.year) == y2k.year + assert y2k_to_undate.month is None + assert y2k_to_undate.day is None + + # unsupported type + with pytest.raises(TypeError): + Undate.to_undate("foo") + # test properties for accessing parts of date def test_year_property(self): # two, three, four five digit years; numeric and string @@ -143,10 +176,11 @@ def test_year_property(self): # unset year assert Undate(month=12, day=31).year == "XXXX" + # NOTE: no longer supported to inistalize undate with no date information # force method to hit conditional for date precision - some_century = Undate() - some_century.precision = DatePrecision.CENTURY - assert some_century.year is None + # some_century = Undate() + # some_century.precision = DatePrecision.CENTURY + # assert some_century.year is None def test_month_property(self): # one, two digit month @@ -438,117 +472,10 @@ def test_format(self): Undate(1984).format("%Y-%m") -class TestUndateInterval: - def test_str(self): - # 2022 - 2023 - assert str(UndateInterval(Undate(2022), Undate(2023))) == "2022/2023" - # 2022 - 2023-05 - assert str(UndateInterval(Undate(2022), Undate(2023, 5))) == "2022/2023-05" - # 2022-11-01 to 2022-11-07 - assert ( - str(UndateInterval(Undate(2022, 11, 1), Undate(2023, 11, 7))) - == "2022-11-01/2023-11-07" - ) - - def test_format(self): - interval = UndateInterval(Undate(2000), Undate(2001)) - assert interval.format("EDTF") == "2000/2001" - assert interval.format("ISO8601") == "2000/2001" - - # Open-ended intervals - open_start = UndateInterval(latest=Undate(2000)) - assert open_start.format("EDTF") == "../2000" - assert open_start.format("ISO8601") == "/2000" - - open_end = UndateInterval(earliest=Undate(2000)) - assert open_end.format("EDTF") == "2000/.." - assert open_end.format("ISO8601") == "2000/" - - def test_repr(self): - assert ( - repr(UndateInterval(Undate(2022), Undate(2023))) - == "" - ) - assert ( - repr(UndateInterval(Undate(2022), Undate(2023), label="Fancy Epoch")) - == "" - ) - - def test_str_open_range(self): - # 900 - - assert str(UndateInterval(Undate(900))) == "0900/" - # - 1900 - assert str(UndateInterval(latest=Undate(1900))) == "../1900" - # - 1900-12 - assert str(UndateInterval(latest=Undate(1900, 12))) == "../1900-12" - - def test_eq(self): - assert UndateInterval(Undate(2022), Undate(2023)) == UndateInterval( - Undate(2022), Undate(2023) - ) - assert UndateInterval(Undate(2022), Undate(2023, 5)) == UndateInterval( - Undate(2022), Undate(2023, 5) - ) - assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5)) - - def test_not_eq(self): - assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval( - Undate(2022), Undate(2024) - ) - assert UndateInterval(Undate(2022), Undate(2023, 5)) != UndateInterval( - Undate(2022), Undate(2023, 6) - ) - assert UndateInterval(Undate(2022), Undate(2023, 5)) != UndateInterval( - Undate(2022), Undate(2023) - ) - assert UndateInterval(Undate(2022, 5)) != UndateInterval(Undate(2022, 6)) - - def test_min_year_non_leapyear(self): - assert not calendar.isleap(Undate.MIN_ALLOWABLE_YEAR) - - def test_duration(self): - week_duration = UndateInterval( - Undate(2022, 11, 1), Undate(2022, 11, 7) - ).duration() - assert isinstance(week_duration, Timedelta) - assert week_duration.days == 7 - - twomonths = UndateInterval(Undate(2022, 11), Undate(2022, 12)).duration() - # november - december = 30 days + 31 days - assert twomonths.days == 30 + 31 - - twoyears = UndateInterval(Undate(2021), Undate(2022)).duration() - assert twoyears.days == 365 * 2 - - # special case: month/day with no year (assumes same year) - week_noyear_duration = UndateInterval( - Undate(None, 11, 1), Undate(None, 11, 7) - ).duration() - assert week_noyear_duration.days == 7 - # special case 2: month/day with no year, wrapping from december to january - # (assumes sequential years) - month_noyear_duration = UndateInterval( - Undate(None, 12, 1), Undate(None, 1, 1) - ).duration() - assert month_noyear_duration.days == 32 - - # real world test cases from Shakespeare and Company Project data; - # second date is a year minus one day in the future - month_noyear_duration = UndateInterval( - Undate(None, 6, 7), Undate(None, 6, 6) - ).duration() - assert month_noyear_duration.days == 365 - - # durations that span february in unknown years should assume - # non-leap years - jan_march_duration = UndateInterval( - Undate(None, 2, 28), Undate(None, 3, 1) - ).duration() - assert jan_march_duration.days == 2 - - # duration is not supported for open-ended intervals - assert UndateInterval(Undate(2000), None).duration() == NotImplemented - - # one year set and the other not currently raises not implemented error - with pytest.raises(NotImplementedError): - UndateInterval(Undate(2000), Undate()).duration() +def test_calendar_get_converter(): + # ensure we can retrieve a calendar converter for each + # calendar named in our calendar enum + for cal in Calendar: + converter = Calendar.get_converter(cal) + assert isinstance(converter, BaseCalendarConverter) + assert converter.name.lower() == cal.name.lower()