Skip to content

Commit e4c468d

Browse files
committed
Make Gregorian parser case-insensitive
1 parent 9ca8424 commit e4c468d

File tree

4 files changed

+24
-20
lines changed

4 files changed

+24
-20
lines changed

scripts/generate_gregorian_grammar.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
with month names (full and abbreviated) based on the list of
55
target languages.
66
7-
Run this script with hatch to regeneate the file::
7+
Run this script with hatch to regenerate the file::
88
99
hatch run codegen:generate
1010
@@ -48,7 +48,7 @@ def main():
4848
for width in ["wide", "abbreviated"]:
4949
for month_num, month_name in get_month_names(width, locale=lang).items():
5050
# some locales use a . on the shortened month; let's ignore that
51-
month_name = month_name.strip(".")
51+
month_name = month_name.strip(".").lower()
5252
# In some cases different languages have the same abbreviations;
5353
# in some cases, abbreviated and full are the same.
5454
# Only add if not already present, to avoid redundancy
@@ -59,11 +59,12 @@ def main():
5959
outfile.write(warning_text)
6060

6161
# for each numeric month, generate a rule with all variant names:
62-
# month_1: "January" | "Jan" ...
62+
# month_1: /January|Jan/i
6363
for i, names in all_month_names.items():
64-
# combine all names in an OR string
65-
or_names = " | ".join(f'"{m}"' for m in names)
66-
outfile.write(f"month_{i}: {or_names}\n")
64+
# combine all names in a case-insensitive OR regex
65+
# sort shortest variants last to avoid partial matches hitting first
66+
or_names = "|".join(sorted(names, key=len, reverse=True))
67+
outfile.write(f"month_{i}: /({or_names})/i\n")
6768

6869
print(
6970
f"Successfully regenerated {MONTH_GRAMMAR_FILE.relative_to(pathlib.Path.cwd())}"

src/undate/converters/calendars/gregorian/converter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def parse(self, value: str) -> Undate:
100100

101101
# parse the input string, then transform to undate object
102102
try:
103-
# parse the string with our Hebrew date parser
103+
# parse the string with our Gregorian date parser
104104
parsetree = gregorian_parser.parse(value)
105105
# transform the parse tree into an undate object
106106
undate_obj = self.transformer.transform(parsetree)
Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
// WARNING: This file is auto-generated. DO NOT EDIT.
22
// To regenerate: hatch run codegen:generate
33

4-
month_1: "January" | "Jan" | "enero" | "ene" | "janvier" | "janv" | "Januar" | "Mutarama" | "mut" | "Janwaliyo" | "ጥሪ"
5-
month_2: "February" | "Feb" | "febrero" | "feb" | "février" | "févr" | "Februar" | "Gashyantare" | "gas" | "Febwaliyo" | "ለካቲት" | "ለካ"
6-
month_3: "March" | "Mar" | "marzo" | "mar" | "mars" | "März" | "Werurwe" | "wer" | "Marisi" | "መጋቢት" | "መጋ"
7-
month_4: "April" | "Apr" | "abril" | "abr" | "avril" | "avr" | "Mata" | "mat" | "Apuli" | "Apu" | "ሚያዝያ" | "ሚያ"
8-
month_5: "May" | "mayo" | "may" | "mai" | "Mai" | "Gicurasi" | "gic" | "Maayi" | "Maa" | "ጉንበት" | "ግን"
9-
month_6: "June" | "Jun" | "junio" | "jun" | "juin" | "Juni" | "Kamena" | "kam" | "Juuni" | "Juu" | "ሰነ"
10-
month_7: "July" | "Jul" | "julio" | "jul" | "juillet" | "juil" | "Juli" | "Nyakanga" | "nya" | "Julaayi" | "ሓምለ" | "ሓም"
11-
month_8: "August" | "Aug" | "agosto" | "ago" | "août" | "Kanama" | "kan" | "Agusito" | "Agu" | "ነሓሰ" | "ነሓ"
12-
month_9: "September" | "Sep" | "septiembre" | "sept" | "septembre" | "Sept" | "Nzeri" | "nze" | "Sebuttemba" | "Seb" | "መስከረም" | "መስ"
13-
month_10: "October" | "Oct" | "octubre" | "oct" | "octobre" | "Oktober" | "Okt" | "Ukwakira" | "ukw" | "Okitobba" | "Oki" | "ጥቅምቲ" | "ጥቅ"
14-
month_11: "November" | "Nov" | "noviembre" | "nov" | "novembre" | "Ugushyingo" | "ugu" | "Novemba" | "ሕዳር" | "ሕዳ"
15-
month_12: "December" | "Dec" | "diciembre" | "dic" | "décembre" | "déc" | "Dezember" | "Dez" | "Ukuboza" | "uku" | "Desemba" | "Des" | "ታሕሳስ" | "ታሕ"
4+
month_1: /(janwaliyo|mutarama|january|janvier|januar|enero|janv|jan|ene|mut|ጥሪ)/i
5+
month_2: /(gashyantare|febwaliyo|february|febrero|février|februar|févr|ለካቲት|feb|gas|ለካ)/i
6+
month_3: /(werurwe|marisi|march|marzo|mars|märz|መጋቢት|mar|wer|መጋ)/i
7+
month_4: /(april|abril|avril|apuli|mata|ሚያዝያ|apr|abr|avr|mat|apu|ሚያ)/i
8+
month_5: /(gicurasi|maayi|mayo|ጉንበት|may|mai|gic|maa|ግን)/i
9+
month_6: /(kamena|junio|juuni|june|juin|juni|jun|kam|juu|ሰነ)/i
10+
month_7: /(nyakanga|juillet|julaayi|julio|july|juil|juli|jul|nya|ሓምለ|ሓም)/i
11+
month_8: /(agusito|august|agosto|kanama|août|aug|ago|kan|agu|ነሓሰ|ነሓ)/i
12+
month_9: /(septiembre|sebuttemba|september|septembre|nzeri|መስከረም|sept|sep|nze|seb|መስ)/i
13+
month_10: /(ukwakira|okitobba|october|octubre|octobre|oktober|ጥቅምቲ|oct|okt|ukw|oki|ጥቅ)/i
14+
month_11: /(ugushyingo|noviembre|november|novembre|novemba|nov|ugu|ሕዳር|ሕዳ)/i
15+
month_12: /(diciembre|december|décembre|dezember|ukuboza|desemba|ታሕሳስ|dec|dic|déc|dez|uku|des|ታሕ)/i

tests/test_converters/test_calendars/test_gregorian/test_gregorian_parser.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
# French
3939
"18 avril 2025",
4040
"18 avr. 2025",
41+
# case-insensitive
42+
"18 JUNE 2025",
43+
"Avril 2025",
4144
]
4245

4346

@@ -51,7 +54,7 @@ def test_should_parse(date_string):
5154
("0 June 1006", UnexpectedCharacters),
5255
("42 March 1206", UnexpectedCharacters),
5356
# month alone
54-
("Juin", UnexpectedCharacters),
57+
("Juin", UnexpectedEOF),
5558
# day only
5659
("12 ", UnexpectedEOF),
5760
# non-Gregorian month

0 commit comments

Comments
 (0)