From 3012907d71fad50137a84e0fdf3cf45d93534a82 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:04:59 +0200 Subject: [PATCH 01/33] chore: add another entry for the same year --- tests/mocks/test.bib | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/tests/mocks/test.bib b/tests/mocks/test.bib index 1750d63..3a052ba 100644 --- a/tests/mocks/test.bib +++ b/tests/mocks/test.bib @@ -57,6 +57,18 @@ @book{hegel1991logic address = {Atlantic Highlands, N.J.} } +@book{hegel1991encyclopaedialogic, + title = {The Encyclopaedia Logic: Part I of the + Encyclopaedia of Philosophical Sciences + with the Zusatze}, + author = {Hegel, G.W.F.}, + translator = {Harris, H.S. and Suchting, W.A. and Geraets, T.F.}, + year = {1991}, + address = {Indianapolis/Cambridge}, + publisher = {Hackett Publishing Company Inc.}, + isbn = {0-87220-070-1} +} + @book{houlgate2022hegel, title = {Hegel on Being}, author = {Houlgate, S.}, @@ -92,14 +104,14 @@ @book{mctaggart1910hegel address = {Cambridge} } -@article{James_Knappik_2024, - title={Introduction to Part 2 of the Themed Issue, ‘Racism and Colonialism in Hegel’s Philosophy’: Common Objections and Questions for Future Research}, - volume={45}, - DOI={10.1017/hgl.2024.38}, - number={2}, - journal={Hegel Bulletin}, - author={James, Daniel and Knappik, Franz}, +@article{James_Knappik_2024, + title = {Introduction to Part 2 of the Themed Issue, ‘Racism and Colonialism in Hegel’s Philosophy’: Common Objections and Questions for Future Research}, + volume = {45}, + doi = {10.1017/hgl.2024.38}, + number = {2}, + journal = {Hegel Bulletin}, + author = {James, Daniel and Knappik, Franz}, translator = {Guyer, Paul and Wood, Allen W.}, - year={2024}, - pages={181–184} + year = {2024}, + pages = {181–184} } \ No newline at end of file From bc3656e558d78fcdbda6c8801e56dc8be30ad158 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:11:14 +0200 Subject: [PATCH 02/33] change: regex matching to include keys --- src/validators.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/validators.rs b/src/validators.rs index 43250f8..a5d4679 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -226,6 +226,14 @@ fn check_parentheses_balance(markdown: &String) -> bool { fn extract_citations_from_markdown(markdown: &String) -> Vec { // Regex explanation // + // \( and \) match parentheses + // (see\s)? optionally matches "see " + // @? optionally matches an @ symbol (for citation keys) + // [A-Za-z] matches the first letter of the author name or key + // [^()]*? matches everything except parentheses (non-greedy) + // \d* matches optional digits (for years or digits inside keys) + // (?:,[^)]*)? optionally matches a comma and any text afterward (page numbers, locators) + // \( Match an opening parenthesis // (see\s)? Optionally match the word "see" followed by a whitespace // ([A-Z] Match a capital letter @@ -239,7 +247,7 @@ fn extract_citations_from_markdown(markdown: &String) -> Vec { // // The regex will match citations in the format (Author_last_name 2021) or (Author_last_name 2021, 123) // - let citation_regex = Regex::new(r"\((see\s)?([A-Z][^()]*?\d+(?:,[^)]*)?)\)").unwrap(); + let citation_regex = Regex::new(r"\((see\s)?(@?[A-Za-z][^()]*?\d*(?:,[^)]*)?)\)").unwrap(); let mut citations = Vec::new(); for line in markdown.lines() { From 69c6f8257fc2f6afb7eeb5f49146d797efd52320 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:16:35 +0200 Subject: [PATCH 03/33] fix: regex --- src/validators.rs | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/validators.rs b/src/validators.rs index a5d4679..0fe8822 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -226,28 +226,29 @@ fn check_parentheses_balance(markdown: &String) -> bool { fn extract_citations_from_markdown(markdown: &String) -> Vec { // Regex explanation // - // \( and \) match parentheses - // (see\s)? optionally matches "see " - // @? optionally matches an @ symbol (for citation keys) - // [A-Za-z] matches the first letter of the author name or key - // [^()]*? matches everything except parentheses (non-greedy) - // \d* matches optional digits (for years or digits inside keys) - // (?:,[^)]*)? optionally matches a comma and any text afterward (page numbers, locators) - - // \( Match an opening parenthesis - // (see\s)? Optionally match the word "see" followed by a whitespace - // ([A-Z] Match a capital letter - // [^()]*? Match any character except opening and closing parenthesis - // \d+ Match one or more digits - // (?: Start a non-capturing group - // , Match a comma - // [^)]* Match any character except closing parenthesis - // )? End the non-capturing group and make it optional - // \) Match a closing parenthesis + // \( Match an opening parenthesis + // (see\s)? Optionally match the word "see" followed by a whitespace + // ( Start capturing group for citation content + // @[^(),\s]+ Match @ followed by one or more non-comma, non-parenthesis, non-whitespace chars + // | OR + // [A-Z] Match a capital letter (traditional author format) + // [^()]*? Match any character except parentheses (non-greedy) + // \d+ Match one or more digits (year) + // ) End capturing group + // (?: Start non-capturing group for optional page numbers + // , Match a comma + // [^)]* Match any character except closing parenthesis + // )? End non-capturing group, make it optional + // \) Match a closing parenthesis // - // The regex will match citations in the format (Author_last_name 2021) or (Author_last_name 2021, 123) + // The regex will match citations in these formats: + // - (@hegel1991logic, 123) + // - (see @hegel1991logic, 123) + // - (Hegel 2022, 123) + // - (see Hegel 2022, 123) // - let citation_regex = Regex::new(r"\((see\s)?(@?[A-Za-z][^()]*?\d*(?:,[^)]*)?)\)").unwrap(); + let citation_regex = + Regex::new(r"\((see\s)?(@[^(),\s]+|[A-Z][^()]*?\d+)(?:,[^)]*)?\)").unwrap(); let mut citations = Vec::new(); for line in markdown.lines() { From 74d31abaa96bb1b66c4caf7eeaa1b01fc06c7b8b Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:18:18 +0200 Subject: [PATCH 04/33] fix: regex2 --- src/validators.rs | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/validators.rs b/src/validators.rs index 0fe8822..e97d17f 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -226,20 +226,20 @@ fn check_parentheses_balance(markdown: &String) -> bool { fn extract_citations_from_markdown(markdown: &String) -> Vec { // Regex explanation // - // \( Match an opening parenthesis - // (see\s)? Optionally match the word "see" followed by a whitespace - // ( Start capturing group for citation content - // @[^(),\s]+ Match @ followed by one or more non-comma, non-parenthesis, non-whitespace chars - // | OR - // [A-Z] Match a capital letter (traditional author format) - // [^()]*? Match any character except parentheses (non-greedy) - // \d+ Match one or more digits (year) - // ) End capturing group - // (?: Start non-capturing group for optional page numbers - // , Match a comma - // [^)]* Match any character except closing parenthesis - // )? End non-capturing group, make it optional - // \) Match a closing parenthesis + // \( Match an opening parenthesis + // (see\s)? Optionally match the word "see" followed by a whitespace + // ( Start capturing group for citation content + // @[^(),\s]+ Match @ followed by one or more non-comma, non-parenthesis, non-whitespace chars + // | OR + // [A-Z] Match a capital letter (traditional author format) + // [^()]*? Match any character except parentheses (non-greedy) + // \d+ Match one or more digits (year) + // ) End capturing group + // (?: Start non-capturing group for optional page numbers + // , Match a comma + // [^)]* Match any character except closing parenthesis + // )? End non-capturing group, make it optional + // \) Match a closing parenthesis // // The regex will match citations in these formats: // - (@hegel1991logic, 123) @@ -248,7 +248,8 @@ fn extract_citations_from_markdown(markdown: &String) -> Vec { // - (see Hegel 2022, 123) // let citation_regex = - Regex::new(r"\((see\s)?(@[^(),\s]+|[A-Z][^()]*?\d+)(?:,[^)]*)?\)").unwrap(); + Regex::new(r"\((see\s)?((@[^(),\s]+|[A-Z][^()]*?\d+)(?:,[^)]*)?)?\)").unwrap(); + let mut citations = Vec::new(); for line in markdown.lines() { From a2a21962368cb1de4c5973a2ba9dfa120af3c66c Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:20:30 +0200 Subject: [PATCH 05/33] fix: regex3 --- src/validators.rs | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/validators.rs b/src/validators.rs index e97d17f..31fbc43 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -226,20 +226,14 @@ fn check_parentheses_balance(markdown: &String) -> bool { fn extract_citations_from_markdown(markdown: &String) -> Vec { // Regex explanation // - // \( Match an opening parenthesis - // (see\s)? Optionally match the word "see" followed by a whitespace - // ( Start capturing group for citation content - // @[^(),\s]+ Match @ followed by one or more non-comma, non-parenthesis, non-whitespace chars - // | OR - // [A-Z] Match a capital letter (traditional author format) - // [^()]*? Match any character except parentheses (non-greedy) - // \d+ Match one or more digits (year) - // ) End capturing group - // (?: Start non-capturing group for optional page numbers - // , Match a comma - // [^)]* Match any character except closing parenthesis - // )? End non-capturing group, make it optional - // \) Match a closing parenthesis + // \( Match an opening parenthesis + // (see\s)? Optionally match the word "see" followed by a whitespace + // ( Start capturing group for citation content + // @[^(),\s]+(?:,[^)]*)? Match @ key with optional page numbers + // | OR + // [A-Z][^()]*?\d+(?:,[^)]*)? Match traditional author format with optional page numbers + // ) End capturing group + // \) Match a closing parenthesis // // The regex will match citations in these formats: // - (@hegel1991logic, 123) @@ -248,7 +242,7 @@ fn extract_citations_from_markdown(markdown: &String) -> Vec { // - (see Hegel 2022, 123) // let citation_regex = - Regex::new(r"\((see\s)?((@[^(),\s]+|[A-Z][^()]*?\d+)(?:,[^)]*)?)?\)").unwrap(); + Regex::new(r"\((see\s)?(@[^(),\s]+(?:,[^)]*)?|[A-Z][^()]*?\d+(?:,[^)]*)?)\)").unwrap(); let mut citations = Vec::new(); From df394c96688c410a7255cb44639f4c70f5ca2edf Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:23:09 +0200 Subject: [PATCH 06/33] chore: add single citation key test --- src/validators.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/validators.rs b/src/validators.rs index 31fbc43..170100b 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -375,12 +375,21 @@ mod tests_citation_extraction { let citations = extract_citations_from_markdown(&markdown); assert_eq!(citations, vec!["Hegel 2021"]); } + + #[test] + fn single_citation_key() { + let markdown = String::from("This is a citation (@hegel1991logic) in the text."); + let citations = extract_citations_from_markdown(&markdown); + assert_eq!(citations, vec!["@hegel1991logic"]); + } + #[test] fn single_citation_prefixed_see() { let markdown = String::from("This is a citation (see Hegel 2021) in the text."); let citations = extract_citations_from_markdown(&markdown); assert_eq!(citations, vec!["Hegel 2021"]); } + #[test] fn multiple_citations() { let markdown = @@ -388,6 +397,7 @@ mod tests_citation_extraction { let citations = extract_citations_from_markdown(&markdown); assert_eq!(citations, vec!["Spinoza 2021", "Kant 2020, 123"]); } + #[test] fn multiple_citations_prefixed_see() { let markdown = String::from( @@ -396,18 +406,21 @@ mod tests_citation_extraction { let citations = extract_citations_from_markdown(&markdown); assert_eq!(citations, vec!["Spinoza 2021", "Kant 2020, 123"]); } + #[test] fn no_citation() { let markdown = String::from("This text has no citations."); let citations = extract_citations_from_markdown(&markdown); assert_eq!(citations, Vec::::new()); } + #[test] fn citation_with_additional_text() { let markdown = String::from("This citation (Plato 2019) has additional text."); let citations = extract_citations_from_markdown(&markdown); assert_eq!(citations, vec!["Plato 2019"]); } + #[test] fn multiple_lines() { let markdown = String::from( @@ -418,18 +431,21 @@ mod tests_citation_extraction { let citations = extract_citations_from_markdown(&markdown); assert_eq!(citations, vec!["Aristotle 2020", "Hume 2018"]); } + #[test] fn incomplete_citation_opening_parenthesis_only() { let markdown = String::from("This is an incomplete citation (Spinoza 2021."); let valid_citations = extract_citations_from_markdown(&markdown); assert!(valid_citations.is_empty()); } + #[test] fn incomplete_citation_closing_parenthesis_only() { let markdown = String::from("This is an incomplete citation Descartes 2021)."); let valid_citations = extract_citations_from_markdown(&markdown); assert!(valid_citations.is_empty()); } + #[test] fn mixed_valid_and_invalid_citations() { let markdown = @@ -448,11 +464,13 @@ mod tests_validate_citations { let citations = vec!["Hegel 2021".to_string(), "Kant 2020, 123".to_string()]; assert!(verify_citations_format(&citations).is_ok()); } + #[test] fn missing_year() { let citations = vec!["Hegel".to_string(), "Kant 2020, 123".to_string()]; assert!(verify_citations_format(&citations).is_err()); } + #[test] fn invalid_citation_extra_comma() { let citations = vec![ @@ -462,6 +480,7 @@ mod tests_validate_citations { ]; assert!(verify_citations_format(&citations).is_err()); } + #[test] fn valid_citations_set() { let citations = vec![ @@ -474,18 +493,21 @@ mod tests_validate_citations { let citations_set = create_citations_set(citations); assert_eq!(citations_set, vec!["Hegel 2021", "Kant 2020"]); } + #[test] fn empty_citations_set() { let citations = Vec::::new(); let citations_set = create_citations_set(citations); assert!(citations_set.is_empty()); } + #[test] fn invalid_citations_set() { let citations = vec!["Hegel 2021".to_string(), "Kant, 2020, 123".to_string()]; let citations_set = create_citations_set(citations); assert_eq!(citations_set, vec!["Hegel 2021", "Kant"]); } + // TODO what happened here? investigate // #[test] // fn test_match_citations_to_bibliography() { From bc381b18d17a15c17742d6152e8bc4501b1327e6 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:26:26 +0200 Subject: [PATCH 07/33] chore: add more tests for citation key --- src/validators.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/validators.rs b/src/validators.rs index 170100b..ac5b68f 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -398,6 +398,15 @@ mod tests_citation_extraction { assert_eq!(citations, vec!["Spinoza 2021", "Kant 2020, 123"]); } + #[test] + fn multiple_citations_with_key() { + let markdown = String::from( + "This is a citation (@spinoza2021logic) and another one (@kant2020logic, 123).", + ); + let citations = extract_citations_from_markdown(&markdown); + assert_eq!(citations, vec!["@spinoza2021logic", "@kant2020logic, 123"]); + } + #[test] fn multiple_citations_prefixed_see() { let markdown = String::from( @@ -407,6 +416,15 @@ mod tests_citation_extraction { assert_eq!(citations, vec!["Spinoza 2021", "Kant 2020, 123"]); } + #[test] + fn multiple_citations_mixed_prefixed_see() { + let markdown = String::from( + "This is a citation (see Spinoza 2021) and another one (see @kant2020logic, 123).", + ); + let citations = extract_citations_from_markdown(&markdown); + assert_eq!(citations, vec!["Spinoza 2021", "@kant2020logic, 123"]); + } + #[test] fn no_citation() { let markdown = String::from("This text has no citations."); From 988b0a2e32a8b53ea16cb6a72b4d5a16c0043e97 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:31:18 +0200 Subject: [PATCH 08/33] chore: clean up comment --- src/validators.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/validators.rs b/src/validators.rs index ac5b68f..7fb40e4 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -226,14 +226,14 @@ fn check_parentheses_balance(markdown: &String) -> bool { fn extract_citations_from_markdown(markdown: &String) -> Vec { // Regex explanation // - // \( Match an opening parenthesis - // (see\s)? Optionally match the word "see" followed by a whitespace - // ( Start capturing group for citation content - // @[^(),\s]+(?:,[^)]*)? Match @ key with optional page numbers - // | OR - // [A-Z][^()]*?\d+(?:,[^)]*)? Match traditional author format with optional page numbers - // ) End capturing group - // \) Match a closing parenthesis + // \( Match an opening parenthesis + // (see\s)? Optionally match the word "see" followed by a whitespace + // ( Start capturing group for citation content + // @[^(),\s]+(?:,[^)]*)? Match @ key with optional page numbers + // | OR + // [A-Z][^()]*?\d+(?:,[^)]*)? Match traditional author format with optional page numbers + // ) End capturing group + // \) Match a closing parenthesis // // The regex will match citations in these formats: // - (@hegel1991logic, 123) From b0985ce6341c223fad7b1bc4eeba7851de6108f1 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:38:16 +0200 Subject: [PATCH 09/33] change: update bibliography matching to support key --- src/validators.rs | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/validators.rs b/src/validators.rs index 7fb40e4..de3f11f 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -316,17 +316,33 @@ fn match_citations_to_bibliography( for citation in citations { for entry in bibliography { - let author = entry.author().unwrap(); - let author_last_name = author[0].name.clone(); + let mut is_match = false; - let date: biblatex::PermissiveType = entry.date().unwrap(); - let year = BiblatexUtils::extract_year_from_date(&date, citation.clone()).unwrap(); + if citation.starts_with('@') { + let citation_key = citation.split(',').next().unwrap().trim(); // Extract the key part (everything before comma if present) + let citation_key = &citation_key[1..]; // Remove @ prefix - let author_year = format!("{} {:?}", author_last_name, year); + if entry.key == citation_key { + is_match = true; + } + } else { + let author = entry.author().unwrap(); + let author_last_name = author[0].name.clone(); + + let date: biblatex::PermissiveType = entry.date().unwrap(); + let year = BiblatexUtils::extract_year_from_date(&date, citation.clone()).unwrap(); + + let author_year = format!("{} {:?}", author_last_name, year); + + if citation == author_year { + is_match = true; + } + } - if citation == author_year { + if is_match { unmatched_citations.retain(|x| x != &citation); matched_citations.push(entry.clone()); + break; // Move to next citation once we find a match } } } From 3582da90362bbabaf26736ab20fba8c11aefc0a8 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:45:16 +0200 Subject: [PATCH 10/33] chore: expand text doc with citation key examples --- tests/mocks/data/development.mdx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/mocks/data/development.mdx b/tests/mocks/data/development.mdx index cc0ed5c..1323e88 100644 --- a/tests/mocks/data/development.mdx +++ b/tests/mocks/data/development.mdx @@ -172,3 +172,7 @@ another being whose nature is different; however, with pure being this cannot be case. "Any determination would give it some particular nature, as against some other particular nature—would make it _X_ rather than _not-X_. It has therefore no determination whatever" (McTaggart 1910, 15). + +Some quote (@hegel1991logic, 15). + +Some other quite (@hegel1991encyclopaedialogic, 228). From c703907964a91503b29cddfdffc8e4ada0a4539b Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:50:32 +0200 Subject: [PATCH 11/33] fix: verification for citation keys --- src/validators.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/validators.rs b/src/validators.rs index de3f11f..189f865 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -267,8 +267,13 @@ fn extract_citations_from_markdown(markdown: &String) -> Vec { /// Verifies the format of the citations extracted from the markdown. /// The citations are expected to be in the format (Author_last_name 2021) /// or (Author_last_name 2021, 123) +/// Citations starting with a @key will be ignored. fn verify_citations_format(citations: &Vec) -> Result<(), io::Error> { for citation in citations { + if citation.starts_with("@") { + continue; + } + let citation_split = citation.splitn(2, ',').collect::>(); let first_part = citation_split[0].trim(); let has_year = first_part.split_whitespace().any(|word| { From 08c533262f0b465a351580fa650950a6a6422bbb Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 00:58:40 +0200 Subject: [PATCH 12/33] chore: formatting --- src/transformers.rs | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/transformers.rs b/src/transformers.rs index 5b62f82..dad6884 100644 --- a/src/transformers.rs +++ b/src/transformers.rs @@ -13,9 +13,7 @@ pub fn entries_to_strings(entries: Vec) -> Vec { EntryType::Book => { strings_output.push(transform_book_entry(&entry)); } - EntryType::Article => { - strings_output.push(transform_article_entry(&entry)) - } + EntryType::Article => strings_output.push(transform_article_entry(&entry)), _ => println!("Entry type not supported: {:?}", entry.entry_type), } } @@ -61,16 +59,14 @@ fn transform_article_entry(entry: &Entry) -> String { add_authors(author, &mut article_string); add_article_title(title, &mut article_string); - add_journal_volume_number_year_pages( - journal, volume, number, year, pages, &mut article_string, - ); + add_journal_volume_number_year_pages(journal, volume, number, year, pages, &mut article_string); add_translators(translators, &mut article_string); add_doi(doi, &mut article_string); article_string.trim_end().to_string() } -/// Generate a string of a type of contributors. +/// Generate a string of a type of contributors. /// E.g. "Edited", "Translated" become "Edited by", "Translated by". /// Handles the case when there are multiple contributors. fn generate_contributors( @@ -104,9 +100,9 @@ fn add_authors(author: Vec, bib_html: &mut String) { author[0].name, author[0].given_name )); } else if author.len() == 2 { - // In Chicago style, when listing multiple authors in a bibliography entry, - // only the first author's name is inverted (i.e., "Last, First"). The second and subsequent - // authors' names are written in standard order (i.e., "First Last"). + // In Chicago style, when listing multiple authors in a bibliography entry, + // only the first author's name is inverted (i.e., "Last, First"). The second and subsequent + // authors' names are written in standard order (i.e., "First Last"). // This rule helps differentiate the primary author from co-authors. bib_html.push_str(&format!( "{}, {} and {} {}. ", @@ -173,14 +169,16 @@ fn sort_entries(entries: Vec) -> Vec { sorted_entries.sort_by(|a, b| { let a_authors = a.author().unwrap_or_default(); let b_authors = b.author().unwrap_or_default(); - - let a_last_name = a_authors.first() + + let a_last_name = a_authors + .first() .map(|p| p.name.clone().to_lowercase()) .unwrap_or_default(); - let b_last_name = b_authors.first() + let b_last_name = b_authors + .first() .map(|p| p.name.clone().to_lowercase()) .unwrap_or_default(); - + a_last_name.cmp(&b_last_name) }); sorted_entries @@ -240,4 +238,4 @@ fn extract_pages(entry: &Entry) -> String { let pages_permissive = entry.pages().unwrap(); let pages = BiblatexUtils::extract_pages(&pages_permissive); pages -} \ No newline at end of file +} From c86725218729f62c8526625127a884426c51133a Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 01:12:00 +0200 Subject: [PATCH 13/33] feat: disambiguiation logic --- src/transformers.rs | 76 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/src/transformers.rs b/src/transformers.rs index dad6884..a0442f7 100644 --- a/src/transformers.rs +++ b/src/transformers.rs @@ -1,17 +1,19 @@ use biblatex::{Entry, EntryType}; +use std::collections::HashMap; use utils::BiblatexUtils; use crate::utils; /// Transform a list of entries into a list of strings according to the Chicago bibliography style. pub fn entries_to_strings(entries: Vec) -> Vec { + let entries_clone = entries.clone(); let sorted_entries = sort_entries(entries); let mut strings_output: Vec = Vec::new(); for entry in sorted_entries { match entry.entry_type { EntryType::Book => { - strings_output.push(transform_book_entry(&entry)); + strings_output.push(transform_book_entry(&entry, entries_clone.clone())); } EntryType::Article => strings_output.push(transform_article_entry(&entry)), _ => println!("Entry type not supported: {:?}", entry.entry_type), @@ -22,14 +24,14 @@ pub fn entries_to_strings(entries: Vec) -> Vec { } /// Transform a book entry into a string according to the Chicago bibliography style. -fn transform_book_entry(entry: &Entry) -> String { +fn transform_book_entry(entry: &Entry, entries: Vec) -> String { let mut book_string = String::new(); let author = entry.author().unwrap(); let title = extract_title(entry); let publisher = extract_publisher(entry); let address = extract_address(entry); - let year = extract_date(entry); + let year = extract_date_with_context(entry, entries); let translators = entry.translator().unwrap_or(Vec::new()); let doi = entry.doi().unwrap_or("".to_string()); @@ -129,7 +131,7 @@ fn add_doi(doi: String, target_string: &mut String) { } /// Add year to the target string. -fn add_year(year: i32, target_string: &mut String) { +fn add_year(year: String, target_string: &mut String) { target_string.push_str(&format!("{}. ", year)); } @@ -239,3 +241,69 @@ fn extract_pages(entry: &Entry) -> String { let pages = BiblatexUtils::extract_pages(&pages_permissive); pages } + +/// Extracts year with disambiguation letter if needed +/// Returns the year as a string with letter suffix (e.g., "1991a", "1991b") for disambiguation +fn extract_date_with_disambiguation(entries: Vec) -> HashMap { + let mut year_map = HashMap::new(); + let mut author_year_counts: HashMap> = HashMap::new(); + + // First pass: group entries by author-year combination + for entry in &entries { + let author = entry.author().unwrap(); + let author_last_name = author[0].name.clone(); + + let date = entry.date().unwrap(); + let year = BiblatexUtils::extract_year_from_date(&date, entry.key.clone()).unwrap(); + + let author_year_key = format!("{}-{}", author_last_name, year); + let entry_key = entry.key.clone(); + + author_year_counts + .entry(author_year_key) + .or_insert_with(Vec::new) + .push(entry_key); + } + + // Second pass: assign disambiguation letters + for entry in &entries { + let author = entry.author().unwrap(); + let author_last_name = author[0].name.clone(); + + let date = entry.date().unwrap(); + let year = BiblatexUtils::extract_year_from_date(&date, entry.key.clone()).unwrap(); + + let author_year_key = format!("{}-{}", author_last_name, year); + let entry_key = entry.key.clone(); + + let entries_for_author_year = author_year_counts.get(&author_year_key).unwrap(); + + let disambiguated_year = if entries_for_author_year.len() > 1 { + // Multiple entries for same author-year, add letter + let position = entries_for_author_year + .iter() + .position(|k| k == &entry_key) + .unwrap(); + let letter = char::from(b'a' + position as u8); + format!("{}{}", year, letter) + } else { + // Only one entry for this author-year, no disambiguation needed + year.to_string() + }; + + year_map.insert(entry_key, disambiguated_year); + } + + year_map +} + +/// Updated extract_date function that uses disambiguation +fn extract_date_with_context(entry: &Entry, entries: Vec) -> String { + let disambiguation_map = extract_date_with_disambiguation(entries); + let key = entry.key.clone(); + + disambiguation_map + .get(&key) + .unwrap_or(&"Unknown".to_string()) + .clone() +} From 49c72af7800a4c82fac46a0934e1425554bc0ed9 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 16:12:47 +0200 Subject: [PATCH 14/33] chore: fix articles bibliographical entry and make it use disambiguated author date --- src/inserters.rs | 6 +- src/transformers.rs | 212 ++++++++++++++++++++++++-------------------- src/validators.rs | 42 +++++++-- 3 files changed, 153 insertions(+), 107 deletions(-) diff --git a/src/inserters.rs b/src/inserters.rs index 6bc924b..da3c20e 100644 --- a/src/inserters.rs +++ b/src/inserters.rs @@ -1,4 +1,3 @@ -use biblatex::Entry; use itertools::Itertools; use regex::Regex; use std::collections::BTreeSet; @@ -6,6 +5,7 @@ use std::fs::{self, OpenOptions}; use std::io::{self, Write}; use validators::{ArticleFileData, Metadata}; +use crate::validators::MatchedCitationDisambiguated; use crate::{transformers, validators}; struct InserterOutcome { @@ -114,7 +114,7 @@ pub fn generate_index_to_file( fn process_mdx_file(article_file_data: ArticleFileData, inserter_outcome: &mut InserterOutcome) { let mut mdx_payload = String::new(); - let mdx_bibliography = generate_mdx_bibliography(article_file_data.matched_citations); + let mdx_bibliography = generate_mdx_bibliography(article_file_data.entries_disambiguated); let mdx_authors = generate_mdx_authors(&article_file_data.metadata); let mdx_notes_heading = generate_notes_heading(&article_file_data.markdown_content); @@ -165,7 +165,7 @@ fn append_to_file(path: &str, content: &str) -> std::io::Result<()> { Ok(()) } -fn generate_mdx_bibliography(entries: Vec) -> String { +fn generate_mdx_bibliography(entries: Vec) -> String { let mut bib_html = String::new(); if entries.is_empty() { diff --git a/src/transformers.rs b/src/transformers.rs index a0442f7..2fada33 100644 --- a/src/transformers.rs +++ b/src/transformers.rs @@ -1,22 +1,26 @@ use biblatex::{Entry, EntryType}; use std::collections::HashMap; use utils::BiblatexUtils; +use validators::{MatchedCitation, MatchedCitationDisambiguated}; use crate::utils; +use crate::validators; /// Transform a list of entries into a list of strings according to the Chicago bibliography style. -pub fn entries_to_strings(entries: Vec) -> Vec { - let entries_clone = entries.clone(); +pub fn entries_to_strings(entries: Vec) -> Vec { let sorted_entries = sort_entries(entries); let mut strings_output: Vec = Vec::new(); - for entry in sorted_entries { - match entry.entry_type { + for matched_citation in sorted_entries { + match matched_citation.entry.entry_type { EntryType::Book => { - strings_output.push(transform_book_entry(&entry, entries_clone.clone())); + strings_output.push(transform_book_entry(&matched_citation)); } - EntryType::Article => strings_output.push(transform_article_entry(&entry)), - _ => println!("Entry type not supported: {:?}", entry.entry_type), + EntryType::Article => strings_output.push(transform_article_entry(&matched_citation)), + _ => println!( + "Entry type not supported: {:?}", + &matched_citation.entry.entry_type + ), } } @@ -24,19 +28,17 @@ pub fn entries_to_strings(entries: Vec) -> Vec { } /// Transform a book entry into a string according to the Chicago bibliography style. -fn transform_book_entry(entry: &Entry, entries: Vec) -> String { +fn transform_book_entry(matched_citation: &MatchedCitationDisambiguated) -> String { let mut book_string = String::new(); - let author = entry.author().unwrap(); - let title = extract_title(entry); - let publisher = extract_publisher(entry); - let address = extract_address(entry); - let year = extract_date_with_context(entry, entries); - let translators = entry.translator().unwrap_or(Vec::new()); - let doi = entry.doi().unwrap_or("".to_string()); + let author = matched_citation.citation_author_date_disambiguated.clone(); + let title = extract_title(&matched_citation.entry); + let publisher = extract_publisher(&matched_citation.entry); + let address = extract_address(&matched_citation.entry); + let translators = matched_citation.entry.translator().unwrap_or(Vec::new()); + let doi = matched_citation.entry.doi().unwrap_or("".to_string()); - add_authors(author, &mut book_string); - add_year(year, &mut book_string); + add_authors_and_date(author, &mut book_string); add_book_title(title, &mut book_string); add_translators(translators, &mut book_string); add_address_and_publisher(address, publisher, &mut book_string); @@ -46,22 +48,21 @@ fn transform_book_entry(entry: &Entry, entries: Vec) -> String { } /// Transform an article entry into a string according to the Chicago bibliography style. -fn transform_article_entry(entry: &Entry) -> String { +fn transform_article_entry(matched_citation: &MatchedCitationDisambiguated) -> String { let mut article_string = String::new(); - let author = entry.author().unwrap(); - let title = extract_title(entry); - let journal = extract_journal(entry); - let volume = extract_volume(entry); - let number = extract_number(entry); - let pages = extract_pages(entry); - let year = extract_date(entry); - let translators = entry.translator().unwrap_or(Vec::new()); - let doi = entry.doi().unwrap_or("".to_string()); + let author = matched_citation.citation_author_date_disambiguated.clone(); + let title = extract_title(&matched_citation.entry); + let journal = extract_journal(&matched_citation.entry); + let volume = extract_volume(&matched_citation.entry); + let number = extract_number(&matched_citation.entry); + let pages = extract_pages(&matched_citation.entry); + let translators = matched_citation.entry.translator().unwrap_or(Vec::new()); + let doi = matched_citation.entry.doi().unwrap_or("".to_string()); - add_authors(author, &mut article_string); + add_authors_and_date(author, &mut article_string); add_article_title(title, &mut article_string); - add_journal_volume_number_year_pages(journal, volume, number, year, pages, &mut article_string); + add_journal_volume_number_pages(journal, volume, number, pages, &mut article_string); add_translators(translators, &mut article_string); add_doi(doi, &mut article_string); @@ -94,24 +95,26 @@ fn generate_contributors( contributors_str } -/// Add authors to the target string. Handles the case when there are multiple authors. -fn add_authors(author: Vec, bib_html: &mut String) { +/// Add authors and date as is. +fn add_authors_and_date(author_and_date: String, bib_html: &mut String) { + bib_html.push_str(&format!("{}", author_and_date)); +} + +/// Returns Chicago style format for authors. Handles the case when there are multiple authors. +fn format_authors(author: Vec) -> String { if author.len() > 2 { - bib_html.push_str(&format!( - "{}, {} et al. ", - author[0].name, author[0].given_name - )); + return format!("{}, {} et al. ", author[0].name, author[0].given_name); } else if author.len() == 2 { // In Chicago style, when listing multiple authors in a bibliography entry, // only the first author's name is inverted (i.e., "Last, First"). The second and subsequent // authors' names are written in standard order (i.e., "First Last"). // This rule helps differentiate the primary author from co-authors. - bib_html.push_str(&format!( + return format!( "{}, {} and {} {}. ", author[0].name, author[0].given_name, author[1].given_name, author[1].name - )); + ); } else { - bib_html.push_str(&format!("{}, {}. ", author[0].name, author[0].given_name)); + return format!("{}, {}. ", author[0].name, author[0].given_name); } } @@ -130,11 +133,6 @@ fn add_doi(doi: String, target_string: &mut String) { } } -/// Add year to the target string. -fn add_year(year: String, target_string: &mut String) { - target_string.push_str(&format!("{}. ", year)); -} - /// Add book title to the target string. Mainly used for books. fn add_book_title(title: String, target_string: &mut String) { target_string.push_str(&format!("_{}_. ", title)); @@ -151,26 +149,25 @@ fn add_address_and_publisher(address: String, publisher: String, target_string: } /// Add journal, volume, number, year, and pages to the target string. Mainly used for articles. -fn add_journal_volume_number_year_pages( +fn add_journal_volume_number_pages( journal: String, volume: i64, number: String, - year: i32, pages: String, target_string: &mut String, ) { target_string.push_str(&format!( - "_{}_ {}, no. {} ({}): {}. ", - journal, volume, number, year, pages + "_{}_ {} ({}): {}. ", + journal, volume, number, pages )); } /// Sort entries by author's last name. -fn sort_entries(entries: Vec) -> Vec { +fn sort_entries(entries: Vec) -> Vec { let mut sorted_entries = entries.clone(); sorted_entries.sort_by(|a, b| { - let a_authors = a.author().unwrap_or_default(); - let b_authors = b.author().unwrap_or_default(); + let a_authors = a.entry.author().unwrap_or_default(); + let b_authors = b.entry.author().unwrap_or_default(); let a_last_name = a_authors .first() @@ -242,68 +239,89 @@ fn extract_pages(entry: &Entry) -> String { pages } -/// Extracts year with disambiguation letter if needed -/// Returns the year as a string with letter suffix (e.g., "1991a", "1991b") for disambiguation -fn extract_date_with_disambiguation(entries: Vec) -> HashMap { - let mut year_map = HashMap::new(); - let mut author_year_counts: HashMap> = HashMap::new(); +/// Transform MatchedCitation vector into MatchedCitationDisambiguated vector +/// This handles all disambiguation logic in one place +pub fn disambiguate_matched_citations( + citations: Vec, +) -> Vec { + // Group citations by author-year for disambiguation analysis + let mut author_year_groups: HashMap> = HashMap::new(); - // First pass: group entries by author-year combination - for entry in &entries { - let author = entry.author().unwrap(); + for citation in &citations { + let author = citation.entry.author().unwrap(); let author_last_name = author[0].name.clone(); - let date = entry.date().unwrap(); - let year = BiblatexUtils::extract_year_from_date(&date, entry.key.clone()).unwrap(); + let date = citation.entry.date().unwrap(); + let year = + BiblatexUtils::extract_year_from_date(&date, citation.entry.key.clone()).unwrap(); let author_year_key = format!("{}-{}", author_last_name, year); - let entry_key = entry.key.clone(); - - author_year_counts + author_year_groups .entry(author_year_key) .or_insert_with(Vec::new) - .push(entry_key); + .push(citation); } - // Second pass: assign disambiguation letters - for entry in &entries { - let author = entry.author().unwrap(); - let author_last_name = author[0].name.clone(); + // Create disambiguation mapping + let mut citation_to_disambiguated: HashMap = HashMap::new(); - let date = entry.date().unwrap(); - let year = BiblatexUtils::extract_year_from_date(&date, entry.key.clone()).unwrap(); + for (_author_year_key, group_citations) in author_year_groups { + if group_citations.len() > 1 { + // Need disambiguation - sort by entry key for consistent ordering + let mut sorted_citations = group_citations; + sorted_citations.sort_by(|a, b| a.entry.key.cmp(&b.entry.key)); - let author_year_key = format!("{}-{}", author_last_name, year); - let entry_key = entry.key.clone(); - - let entries_for_author_year = author_year_counts.get(&author_year_key).unwrap(); - - let disambiguated_year = if entries_for_author_year.len() > 1 { - // Multiple entries for same author-year, add letter - let position = entries_for_author_year - .iter() - .position(|k| k == &entry_key) - .unwrap(); - let letter = char::from(b'a' + position as u8); - format!("{}{}", year, letter) + for (index, citation) in sorted_citations.iter().enumerate() { + let letter = char::from(b'a' + index as u8); + let disambiguated = create_disambiguated_citation(letter, &citation.entry); + citation_to_disambiguated.insert(citation.citation_raw.clone(), disambiguated); + } } else { - // Only one entry for this author-year, no disambiguation needed - year.to_string() - }; - - year_map.insert(entry_key, disambiguated_year); + // No disambiguation needed - convert to standard format + let citation = group_citations[0]; + let standard = create_standard_citation(&citation.citation_raw, &citation.entry); + citation_to_disambiguated.insert(citation.citation_raw.clone(), standard); + } } - year_map + // Transform all citations using the disambiguation map + citations + .into_iter() + .map(|matched_citation| { + let disambiguated = citation_to_disambiguated + .get(&matched_citation.citation_raw) + .cloned() + .unwrap_or_else(|| matched_citation.citation_raw.clone()); // Fallback + + MatchedCitationDisambiguated { + citation_raw: matched_citation.citation_raw, + citation_author_date_disambiguated: disambiguated, + entry: matched_citation.entry, + } + }) + .collect() } -/// Updated extract_date function that uses disambiguation -fn extract_date_with_context(entry: &Entry, entries: Vec) -> String { - let disambiguation_map = extract_date_with_disambiguation(entries); - let key = entry.key.clone(); +/// Create disambiguated citation with letter (e.g., "@hegel2020logic, 123" -> "Hegel 2020a") +fn create_disambiguated_citation(letter: char, entry: &Entry) -> String { + let author = format_authors(entry.author().unwrap()); + let year = extract_date(entry); + format!("{} {}{}", author, year, letter) +} - disambiguation_map - .get(&key) - .unwrap_or(&"Unknown".to_string()) - .clone() +/// Create standard citation format (no disambiguation needed) +fn create_standard_citation(raw_citation: &str, entry: &Entry) -> String { + if raw_citation.starts_with('@') { + // Convert @key to Author Year format + let author = entry.author().unwrap(); + let author_last_name = author[0].name.clone(); + + let date = entry.date().unwrap(); + let year = BiblatexUtils::extract_year_from_date(&date, entry.key.clone()).unwrap(); + + format!("{} {}", author_last_name, year) + } else { + // Already in standard format, return as-is + raw_citation.to_string() + } } diff --git a/src/validators.rs b/src/validators.rs index 189f865..ddb0196 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -1,4 +1,4 @@ -use crate::BiblatexUtils; +use crate::{transformers, BiblatexUtils}; use biblatex::Entry; use regex::Regex; use serde::Deserialize; @@ -40,7 +40,7 @@ pub struct ArticleFileData { /// Contents of the file. pub markdown_content: String, /// A set of citations that exist in the source `.bib` file. - pub matched_citations: Vec, + pub entries_disambiguated: Vec, /// Original contents of the file, includes metadata. pub full_file_content: String, } @@ -54,11 +54,32 @@ pub struct ArticleFileDataUnverified { /// Contents of the file. pub markdown_content: String, /// A set of citations that exist in the source `.bib` file. - pub matched_citations: Vec, + pub entries_disambiguated: Vec, /// Original contents of the file, includes metadata. pub full_file_content: String, } +#[derive(Debug, Clone)] +pub struct MatchedCitation { + /// Original citation. E.g., "@hegel2020logic, 123" or "Hegel 2020, 123" + pub citation_raw: String, + /// bilblatex bibliographical Entry + pub entry: Entry, +} + +#[derive(Debug, Clone)] +pub struct MatchedCitationDisambiguated { + /// Original citation. E.g., "@hegel2020logic, 123" or "Hegel 2020, 123" + pub citation_raw: String, + /// Context aware citation that should include disambiguitation if needed. + /// E.g. "Hegel 2020a" "Hegel 2020b" + pub citation_author_date_disambiguated: String, + /// bilblatex bibliographical Entry + pub entry: Entry, +} + +// TODO program should throw if it finds multiple entries under the same author year, requesting disambiguiation by key + impl TryFrom for ArticleFileData { type Error = Box; @@ -82,7 +103,7 @@ impl TryFrom for ArticleFileData { contributors: article.metadata.contributors, }, markdown_content: article.markdown_content, - matched_citations: article.matched_citations, + entries_disambiguated: article.entries_disambiguated, full_file_content: article.full_file_content, }) } @@ -140,11 +161,15 @@ pub fn verify_mdx_files( std::process::exit(1); } }; + + let disambiguated_matched_citations = + transformers::disambiguate_matched_citations(matched_citations); + let article = ArticleFileDataUnverified { path: mdx_path.clone(), metadata, markdown_content, - matched_citations, + entries_disambiguated: disambiguated_matched_citations, full_file_content, }; @@ -315,7 +340,7 @@ fn create_citations_set(citations: Vec) -> Vec { fn match_citations_to_bibliography( citations: Vec, bibliography: &Vec, -) -> Result, io::Error> { +) -> Result, io::Error> { let mut unmatched_citations = citations.clone(); let mut matched_citations = Vec::new(); @@ -346,7 +371,10 @@ fn match_citations_to_bibliography( if is_match { unmatched_citations.retain(|x| x != &citation); - matched_citations.push(entry.clone()); + matched_citations.push(MatchedCitation { + citation_raw: citation.clone(), + entry: entry.clone(), + }); break; // Move to next citation once we find a match } } From 7ca646fb244e3f190a7919b978e8c88c119e89ab Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 16:22:18 +0200 Subject: [PATCH 15/33] chore: period and space after author date in bibliography --- src/transformers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers.rs b/src/transformers.rs index 2fada33..a1a97a1 100644 --- a/src/transformers.rs +++ b/src/transformers.rs @@ -97,7 +97,7 @@ fn generate_contributors( /// Add authors and date as is. fn add_authors_and_date(author_and_date: String, bib_html: &mut String) { - bib_html.push_str(&format!("{}", author_and_date)); + bib_html.push_str(&format!("{}. ", author_and_date)); } /// Returns Chicago style format for authors. Handles the case when there are multiple authors. From 1ae4e2ae589bd89b9475ff552532863bbb20264a Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 16:43:17 +0200 Subject: [PATCH 16/33] fix: handle proper author and year disambiguation in biblio gen --- src/transformers.rs | 36 +++++++++++++++++++++++++++++------- src/validators.rs | 6 ++++++ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/transformers.rs b/src/transformers.rs index a1a97a1..4cb3c10 100644 --- a/src/transformers.rs +++ b/src/transformers.rs @@ -31,14 +31,16 @@ pub fn entries_to_strings(entries: Vec) -> Vec String { let mut book_string = String::new(); - let author = matched_citation.citation_author_date_disambiguated.clone(); + let author = matched_citation.entry.author().unwrap(); + let year = matched_citation.year_disambiguated.clone(); let title = extract_title(&matched_citation.entry); let publisher = extract_publisher(&matched_citation.entry); let address = extract_address(&matched_citation.entry); let translators = matched_citation.entry.translator().unwrap_or(Vec::new()); let doi = matched_citation.entry.doi().unwrap_or("".to_string()); - add_authors_and_date(author, &mut book_string); + add_authors(author, &mut book_string); + add_year(year, &mut book_string); add_book_title(title, &mut book_string); add_translators(translators, &mut book_string); add_address_and_publisher(address, publisher, &mut book_string); @@ -51,7 +53,8 @@ fn transform_book_entry(matched_citation: &MatchedCitationDisambiguated) -> Stri fn transform_article_entry(matched_citation: &MatchedCitationDisambiguated) -> String { let mut article_string = String::new(); - let author = matched_citation.citation_author_date_disambiguated.clone(); + let author = matched_citation.entry.author().unwrap(); + let year = matched_citation.year_disambiguated.clone(); let title = extract_title(&matched_citation.entry); let journal = extract_journal(&matched_citation.entry); let volume = extract_volume(&matched_citation.entry); @@ -60,7 +63,8 @@ fn transform_article_entry(matched_citation: &MatchedCitationDisambiguated) -> S let translators = matched_citation.entry.translator().unwrap_or(Vec::new()); let doi = matched_citation.entry.doi().unwrap_or("".to_string()); - add_authors_and_date(author, &mut article_string); + add_authors(author, &mut article_string); + add_year(year, &mut article_string); add_article_title(title, &mut article_string); add_journal_volume_number_pages(journal, volume, number, pages, &mut article_string); add_translators(translators, &mut article_string); @@ -95,9 +99,13 @@ fn generate_contributors( contributors_str } -/// Add authors and date as is. -fn add_authors_and_date(author_and_date: String, bib_html: &mut String) { - bib_html.push_str(&format!("{}. ", author_and_date)); +fn add_year(year: String, target_string: &mut String) { + target_string.push_str(&format!("{}. ", year)); +} + +// Adds author(s). Handles multiple. +fn add_authors(author: Vec, bib_html: &mut String) { + bib_html.push_str(&format_authors(author)) } /// Returns Chicago style format for authors. Handles the case when there are multiple authors. @@ -264,6 +272,7 @@ pub fn disambiguate_matched_citations( // Create disambiguation mapping let mut citation_to_disambiguated: HashMap = HashMap::new(); + let mut year_to_disambiguated: HashMap = HashMap::new(); for (_author_year_key, group_citations) in author_year_groups { if group_citations.len() > 1 { @@ -275,6 +284,8 @@ pub fn disambiguate_matched_citations( let letter = char::from(b'a' + index as u8); let disambiguated = create_disambiguated_citation(letter, &citation.entry); citation_to_disambiguated.insert(citation.citation_raw.clone(), disambiguated); + let disambiguated_year = create_disambiguated_year(letter, &citation.entry); + year_to_disambiguated.insert(citation.citation_raw.clone(), disambiguated_year); } } else { // No disambiguation needed - convert to standard format @@ -293,9 +304,15 @@ pub fn disambiguate_matched_citations( .cloned() .unwrap_or_else(|| matched_citation.citation_raw.clone()); // Fallback + let disambiguated_year = year_to_disambiguated + .get(&matched_citation.citation_raw) + .cloned() + .unwrap_or_else(|| extract_date(&matched_citation.entry).to_string()); + MatchedCitationDisambiguated { citation_raw: matched_citation.citation_raw, citation_author_date_disambiguated: disambiguated, + year_disambiguated: disambiguated_year, entry: matched_citation.entry, } }) @@ -309,6 +326,11 @@ fn create_disambiguated_citation(letter: char, entry: &Entry) -> String { format!("{} {}{}", author, year, letter) } +fn create_disambiguated_year(letter: char, entry: &Entry) -> String { + let year = extract_date(entry); + format!("{}{}", year, letter) +} + /// Create standard citation format (no disambiguation needed) fn create_standard_citation(raw_citation: &str, entry: &Entry) -> String { if raw_citation.starts_with('@') { diff --git a/src/validators.rs b/src/validators.rs index ddb0196..dc058c1 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -71,9 +71,15 @@ pub struct MatchedCitation { pub struct MatchedCitationDisambiguated { /// Original citation. E.g., "@hegel2020logic, 123" or "Hegel 2020, 123" pub citation_raw: String, + /// Context aware citation that should include disambiguitation if needed. /// E.g. "Hegel 2020a" "Hegel 2020b" pub citation_author_date_disambiguated: String, + + /// Context aware year that should include disambiguitation if needed. + /// E.g. "2020a" "2020b" + pub year_disambiguated: String, + /// bilblatex bibliographical Entry pub entry: Entry, } From e5d6f7b7d0dccdac6e5341660bfc7e0dccf0426a Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 17:17:56 +0200 Subject: [PATCH 17/33] feat: transform keys to citations in article --- src/inserters.rs | 10 ++++++---- src/transformers.rs | 20 ++++++++++++++++++-- src/validators.rs | 4 ++-- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/inserters.rs b/src/inserters.rs index da3c20e..b5a949d 100644 --- a/src/inserters.rs +++ b/src/inserters.rs @@ -5,6 +5,7 @@ use std::fs::{self, OpenOptions}; use std::io::{self, Write}; use validators::{ArticleFileData, Metadata}; +use crate::transformers::transform_keys_to_citations; use crate::validators::MatchedCitationDisambiguated; use crate::{transformers, validators}; @@ -114,7 +115,7 @@ pub fn generate_index_to_file( fn process_mdx_file(article_file_data: ArticleFileData, inserter_outcome: &mut InserterOutcome) { let mut mdx_payload = String::new(); - let mdx_bibliography = generate_mdx_bibliography(article_file_data.entries_disambiguated); + let mdx_bibliography = generate_mdx_bibliography(&article_file_data.entries_disambiguated); let mdx_authors = generate_mdx_authors(&article_file_data.metadata); let mdx_notes_heading = generate_notes_heading(&article_file_data.markdown_content); @@ -136,8 +137,9 @@ fn process_mdx_file(article_file_data: ArticleFileData, inserter_outcome: &mut I return; } - let updated_markdown_content = - format!("{}\n{}", article_file_data.full_file_content, mdx_payload); + let full_file_content_disambiguated = transform_keys_to_citations(&article_file_data); + + let updated_markdown_content = format!("{}\n{}", full_file_content_disambiguated, mdx_payload); match write_html_to_mdx_file(&article_file_data.path, &updated_markdown_content) { Ok(_) => { @@ -165,7 +167,7 @@ fn append_to_file(path: &str, content: &str) -> std::io::Result<()> { Ok(()) } -fn generate_mdx_bibliography(entries: Vec) -> String { +fn generate_mdx_bibliography(entries: &Vec) -> String { let mut bib_html = String::new(); if entries.is_empty() { diff --git a/src/transformers.rs b/src/transformers.rs index 4cb3c10..31b197e 100644 --- a/src/transformers.rs +++ b/src/transformers.rs @@ -5,9 +5,10 @@ use validators::{MatchedCitation, MatchedCitationDisambiguated}; use crate::utils; use crate::validators; +use crate::validators::ArticleFileData; /// Transform a list of entries into a list of strings according to the Chicago bibliography style. -pub fn entries_to_strings(entries: Vec) -> Vec { +pub fn entries_to_strings(entries: &Vec) -> Vec { let sorted_entries = sort_entries(entries); let mut strings_output: Vec = Vec::new(); @@ -27,6 +28,21 @@ pub fn entries_to_strings(entries: Vec) -> Vec String { + let mut full_content = article_file_data.full_file_content.clone(); + + for matched_citation in &article_file_data.entries_disambiguated { + if matched_citation.citation_raw.starts_with('@') { + full_content = full_content.replace( + &matched_citation.citation_raw, + &matched_citation.citation_author_date_disambiguated, + ); + } + } + + full_content +} + /// Transform a book entry into a string according to the Chicago bibliography style. fn transform_book_entry(matched_citation: &MatchedCitationDisambiguated) -> String { let mut book_string = String::new(); @@ -171,7 +187,7 @@ fn add_journal_volume_number_pages( } /// Sort entries by author's last name. -fn sort_entries(entries: Vec) -> Vec { +fn sort_entries(entries: &Vec) -> Vec { let mut sorted_entries = entries.clone(); sorted_entries.sort_by(|a, b| { let a_authors = a.entry.author().unwrap_or_default(); diff --git a/src/validators.rs b/src/validators.rs index dc058c1..0d3d972 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -39,7 +39,7 @@ pub struct ArticleFileData { pub metadata: Metadata, /// Contents of the file. pub markdown_content: String, - /// A set of citations that exist in the source `.bib` file. + /// A set of citations that exist in the source `.bib` file with disambiguated author date and date. pub entries_disambiguated: Vec, /// Original contents of the file, includes metadata. pub full_file_content: String, @@ -53,7 +53,7 @@ pub struct ArticleFileDataUnverified { pub metadata: MetadataUnverified, /// Contents of the file. pub markdown_content: String, - /// A set of citations that exist in the source `.bib` file. + /// A set of citations that exist in the source `.bib` file with disambiguated author date and date. pub entries_disambiguated: Vec, /// Original contents of the file, includes metadata. pub full_file_content: String, From e4df230cde0fe7d228a85da9710fbdc8933fbc3c Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 17:23:49 +0200 Subject: [PATCH 18/33] fix: format authors last name only for disambiguated author date --- src/transformers.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/transformers.rs b/src/transformers.rs index 31b197e..8321e44 100644 --- a/src/transformers.rs +++ b/src/transformers.rs @@ -142,6 +142,17 @@ fn format_authors(author: Vec) -> String { } } +/// Returns Chicago style format for authors. Handles the case when there are multiple authors. +fn format_authors_last_name_only(author: Vec) -> String { + if author.len() > 2 { + return format!("{} et al.", author[0].name); + } else if author.len() == 2 { + return format!("{} and {}", author[0].name, author[1].name); + } else { + return format!("{}", author[0].name); + } +} + /// Add translators to the target string if they exist. fn add_translators(translators: Vec, target_string: &mut String) { let translators_mdx = generate_contributors(translators, "Translated".to_string()); @@ -337,7 +348,7 @@ pub fn disambiguate_matched_citations( /// Create disambiguated citation with letter (e.g., "@hegel2020logic, 123" -> "Hegel 2020a") fn create_disambiguated_citation(letter: char, entry: &Entry) -> String { - let author = format_authors(entry.author().unwrap()); + let author = format_authors_last_name_only(entry.author().unwrap()); let year = extract_date(entry); format!("{} {}{}", author, year, letter) } From 1eaf6020e653dd8778e74d30361882056be300f7 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 17:34:39 +0200 Subject: [PATCH 19/33] test: add integration test cases for disambiguation --- tests/mocks/data/development.mdx | 18 ++++++++++-- tests/mocks/test.bib | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/tests/mocks/data/development.mdx b/tests/mocks/data/development.mdx index 1323e88..408e34e 100644 --- a/tests/mocks/data/development.mdx +++ b/tests/mocks/data/development.mdx @@ -157,6 +157,8 @@ bay their determination of the matter at hand and allow it to be understood freely on its own terms, which in the case of `being` means its simplicity and purity (Houlgate 2022, 137). +Should be a quote for Doe 2021 Smart Cities (@doe2021b, 22). + > To think of being in this way, we must first abstract from all we ordinarily > take being to be; but we must then abstract from, and set aside, the very fact > that pure being is the result of abstraction. Only thus will the process of @@ -165,6 +167,8 @@ purity (Houlgate 2022, 137). ### McTaggart +Should be a quote for Doe 2021 Citizen Engagement (@doe2021e, 59). + John McTaggart looks at Hegel's opening category not so much an affirmation of being as an affirmation of nothing else. He further considers that being has no nature, since any nature would indicate some kind of determinacy vis-à-vis @@ -173,6 +177,16 @@ case. "Any determination would give it some particular nature, as against some other particular nature—would make it _X_ rather than _not-X_. It has therefore no determination whatever" (McTaggart 1910, 15). -Some quote (@hegel1991logic, 15). +Some quote for Miller logic (@hegel1991logic, 15). + +Some other quote for Encyclopaedia Logic (@hegel1991encyclopaedialogic, 228). + +Should be a quote for Doe 2021 Urban Design (@doe2021a, 123). + +Should be a quote for Doe 2021 Smart Cities (@doe2021b, 22). + +Should be a quote for Doe and Smith 2021 Ethical Frameworks (@doe2021c, 411). + +Should be a quote for Doe 2021 Resilience Strategies (@doe2021d, 88). -Some other quite (@hegel1991encyclopaedialogic, 228). +Should be a quote for Doe 2021 Citizen Engagement (@doe2021e, 59). diff --git a/tests/mocks/test.bib b/tests/mocks/test.bib index 3a052ba..148c023 100644 --- a/tests/mocks/test.bib +++ b/tests/mocks/test.bib @@ -114,4 +114,54 @@ @article{James_Knappik_2024 translator = {Guyer, Paul and Wood, Allen W.}, year = {2024}, pages = {181–184} +} + +@article{doe2021a, + author = {Doe, Jane}, + year = {2021}, + title = {Urban Design in the Post-Pandemic World}, + journal = {Journal of Urban Studies}, + volume = {45}, + number = {2}, + pages = {123--140} +} + +@article{doe2021b, + author = {Doe, Jane}, + year = {2021}, + title = {Smart Cities and Privacy: A Critical Perspective}, + journal = {Technology and Society}, + volume = {18}, + number = {1}, + pages = {22--35} +} + +@article{doe2021c, + author = {Doe, Jane and Smith, Robert}, + year = {2021}, + title = {Ethical Frameworks for AI in Urban Planning}, + journal = {AI & Society}, + volume = {36}, + number = {4}, + pages = {411--429} +} + +@article{doe2021d, + author = {Doe, Jane}, + year = {2021}, + title = {Resilience Strategies in Coastal Cities}, + journal = {Environmental Planning Review}, + volume = {12}, + number = {3}, + pages = {88--102} +} + +@article{doe2021e, + author = {Doe, Jane}, + year = {2021}, + title = {Citizen Engagement through Digital Platforms}, + journal = {Public Policy and Administration}, + volume = {29}, + number = {2}, + pages = {59--74} } \ No newline at end of file From b24c2e518eccb595beb433fe51220a215ce2eaf9 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 17:49:11 +0200 Subject: [PATCH 20/33] fix: sorting logic to be disambiguation aware --- src/transformers.rs | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/src/transformers.rs b/src/transformers.rs index 8321e44..d13475d 100644 --- a/src/transformers.rs +++ b/src/transformers.rs @@ -1,3 +1,4 @@ +use biblatex::Person; use biblatex::{Entry, EntryType}; use std::collections::HashMap; use utils::BiblatexUtils; @@ -200,24 +201,46 @@ fn add_journal_volume_number_pages( /// Sort entries by author's last name. fn sort_entries(entries: &Vec) -> Vec { let mut sorted_entries = entries.clone(); + sorted_entries.sort_by(|a, b| { let a_authors = a.entry.author().unwrap_or_default(); let b_authors = b.entry.author().unwrap_or_default(); - let a_last_name = a_authors - .first() - .map(|p| p.name.clone().to_lowercase()) - .unwrap_or_default(); - let b_last_name = b_authors - .first() - .map(|p| p.name.clone().to_lowercase()) - .unwrap_or_default(); + // Get author names for comparison + let a_author_key = author_key(&a_authors); + let b_author_key = author_key(&b_authors); + + // Compare by author(s) + let cmp_author = a_author_key.cmp(&b_author_key); + if cmp_author != std::cmp::Ordering::Equal { + return cmp_author; + } + + // Compare by year + let a_year = &a.year_disambiguated; + let b_year = &b.year_disambiguated; + let cmp_year = a_year.cmp(&b_year); + if cmp_year != std::cmp::Ordering::Equal { + return cmp_year; + } - a_last_name.cmp(&b_last_name) + // Compare by title (for disambiguation) + let a_title = extract_title(&a.entry).to_lowercase(); + let b_title = extract_title(&b.entry).to_lowercase(); + a_title.cmp(&b_title) }); + sorted_entries } +/// Helper to generate a sortable author string +fn author_key(authors: &Vec) -> String { + authors + .first() + .map(|p| p.name.clone().to_lowercase()) + .unwrap_or_default() +} + /// Title of the entry. fn extract_title(entry: &Entry) -> String { let title_spanned = entry.title().unwrap(); From 22ce315aece8bdd316442a35b4fc37add8881301 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 22:45:02 +0200 Subject: [PATCH 21/33] chore: todos --- src/validators.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/validators.rs b/src/validators.rs index 0d3d972..375b579 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -85,6 +85,8 @@ pub struct MatchedCitationDisambiguated { } // TODO program should throw if it finds multiple entries under the same author year, requesting disambiguiation by key +// TODO in that case program should offer alternatives by key +// TODO build test suite for missing keys when keys are used impl TryFrom for ArticleFileData { type Error = Box; From 564d8e4fd67dcec898b5ddb31dd9ee7b7dbad917 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Tue, 1 Jul 2025 23:35:07 +0200 Subject: [PATCH 22/33] feat: check for ambiguous citations and add custom errors --- src/errors.rs | 26 +++++++++++++++++++++++ src/lib.rs | 1 + src/validators.rs | 54 +++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 72 insertions(+), 9 deletions(-) create mode 100644 src/errors.rs diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..5e4d041 --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,26 @@ +use std::fmt; + +/// Validation errors when parsing contents of a file. +#[derive(Debug)] +pub enum CitationError { + /// Two or more possible matches to a single citation. Requires disambiguation through unique key rather than inline citation style. + AmbiguousMatch(String), + + /// Citations that did not find a match in the source `.bib` bibliography. + UnmatchedCitations(Vec), +} + +impl fmt::Display for CitationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CitationError::AmbiguousMatch(details) => { + write!(f, "Ambiguous citation match:\n{}", details) + } + CitationError::UnmatchedCitations(citations) => { + write!(f, "Citations not found in the library: {:?}", citations) + } + } + } +} + +impl std::error::Error for CitationError {} diff --git a/src/lib.rs b/src/lib.rs index 6595a2b..6a3d7fd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -101,6 +101,7 @@ Apache-2.0 */ pub mod cli; +pub mod errors; pub mod inserters; pub mod transformers; pub mod utils; diff --git a/src/validators.rs b/src/validators.rs index 375b579..a02e8b6 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -1,7 +1,9 @@ +use crate::errors::CitationError; use crate::{transformers, BiblatexUtils}; use biblatex::Entry; use regex::Regex; use serde::Deserialize; +use std::collections::HashMap; use std::fs; use std::io::{self, BufReader, Error, Read}; @@ -348,7 +350,7 @@ fn create_citations_set(citations: Vec) -> Vec { fn match_citations_to_bibliography( citations: Vec, bibliography: &Vec, -) -> Result, io::Error> { +) -> Result, CitationError> { let mut unmatched_citations = citations.clone(); let mut matched_citations = Vec::new(); @@ -383,24 +385,58 @@ fn match_citations_to_bibliography( citation_raw: citation.clone(), entry: entry.clone(), }); - break; // Move to next citation once we find a match } } } if unmatched_citations.len() > 0 { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - format!( - "Citations not found in the library: ({:?})", - unmatched_citations - ), - )); + return Err(CitationError::UnmatchedCitations(unmatched_citations)); } + check_for_ambiguous_citations(&matched_citations)?; + Ok(matched_citations) } +fn check_for_ambiguous_citations( + matched_citations: &Vec, +) -> Result<(), CitationError> { + let mut citation_map: HashMap> = HashMap::new(); + + for matched in matched_citations { + citation_map + .entry(matched.citation_raw.clone()) + .or_default() + .push(matched.clone()); + } + + let mut ambiguous_citations = Vec::new(); + + for (citation_raw, matches) in citation_map.iter() { + if matches.len() > 1 { + ambiguous_citations.push((citation_raw.clone(), matches)); + } + } + + if !ambiguous_citations.is_empty() { + let mut error_msg = String::from("Ambiguous citations found:\n"); + for (citation, entries) in ambiguous_citations { + let entry_keys: Vec = entries + .iter() + .map(|m| format!("key: {}", m.entry.key)) + .collect(); + error_msg.push_str(&format!( + "- '{}' might refer to multiple entries: {}\n", + citation, + entry_keys.join(", ") + )); + } + return Err(CitationError::AmbiguousMatch(error_msg)); + } + + Ok(()) +} + #[cfg(test)] mod tests_balanced_parentheses { use super::*; From 67ffa95db5b02f639de275550936705df790d3b0 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 00:08:40 +0200 Subject: [PATCH 23/33] feat: add tests for ambiguous citations --- src/errors.rs | 2 +- src/utils.rs | 4 ++-- src/validators.rs | 58 +++++++++++++++++++++++++++++++++++------------ 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index 5e4d041..8317671 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -14,7 +14,7 @@ impl fmt::Display for CitationError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { CitationError::AmbiguousMatch(details) => { - write!(f, "Ambiguous citation match:\n{}", details) + write!(f, "Ambiguous citations found:\n{}", details) } CitationError::UnmatchedCitations(citations) => { write!(f, "Citations not found in the library: {:?}", citations) diff --git a/src/utils.rs b/src/utils.rs index a419501..749f92c 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -9,10 +9,10 @@ use std::{ use crate::cli::{Cli, Mode}; -/// Utility functions for working with BibTeX files. +/// Utility functions for prepyrus for working with BibTeX files. pub struct BiblatexUtils; -/// Utility functions for working with files and directories. +/// Utility functions for prepyrus working with files and directories. pub struct Utils; #[derive(Debug)] diff --git a/src/validators.rs b/src/validators.rs index a02e8b6..f2e1476 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -33,42 +33,56 @@ pub struct MetadataUnverified { pub contributors: Option, } +/// Contents of an article file as well as path and matched entries. #[derive(Debug, Clone)] pub struct ArticleFileData { /// Path to the file whose contents were extracted. pub path: String, + /// Metadata enclosed at the top of the file. pub metadata: Metadata, + /// Contents of the file. pub markdown_content: String, + /// A set of citations that exist in the source `.bib` file with disambiguated author date and date. pub entries_disambiguated: Vec, + /// Original contents of the file, includes metadata. pub full_file_content: String, } +/// Contents of an article file as well as path and matched entries, but where some fields are unverified. #[derive(Debug, Clone)] pub struct ArticleFileDataUnverified { /// Path to the file whose contents were extracted. pub path: String, + /// Metadata (unverified) enclosed at the top of the file. pub metadata: MetadataUnverified, + /// Contents of the file. pub markdown_content: String, + /// A set of citations that exist in the source `.bib` file with disambiguated author date and date. pub entries_disambiguated: Vec, + /// Original contents of the file, includes metadata. pub full_file_content: String, } +/// Citation from an article that has found a match in the source bibliography. #[derive(Debug, Clone)] pub struct MatchedCitation { /// Original citation. E.g., "@hegel2020logic, 123" or "Hegel 2020, 123" pub citation_raw: String, + /// bilblatex bibliographical Entry pub entry: Entry, } +/// Citation from an article that has found a match in the source bibliography that also +/// includes disambiguated author date `String` and year `String`. #[derive(Debug, Clone)] pub struct MatchedCitationDisambiguated { /// Original citation. E.g., "@hegel2020logic, 123" or "Hegel 2020, 123" @@ -86,8 +100,6 @@ pub struct MatchedCitationDisambiguated { pub entry: Entry, } -// TODO program should throw if it finds multiple entries under the same author year, requesting disambiguiation by key -// TODO in that case program should offer alternatives by key // TODO build test suite for missing keys when keys are used impl TryFrom for ArticleFileData { @@ -318,6 +330,7 @@ fn verify_citations_format(citations: &Vec) -> Result<(), io::Error> { false } }); + if !has_year { return Err(io::Error::new( io::ErrorKind::InvalidData, @@ -419,7 +432,7 @@ fn check_for_ambiguous_citations( } if !ambiguous_citations.is_empty() { - let mut error_msg = String::from("Ambiguous citations found:\n"); + let mut error_msg = String::from(""); for (citation, entries) in ambiguous_citations { let entry_keys: Vec = entries .iter() @@ -619,15 +632,32 @@ mod tests_validate_citations { assert_eq!(citations_set, vec!["Hegel 2021", "Kant"]); } - // TODO what happened here? investigate - // #[test] - // fn test_match_citations_to_bibliography() { - // let bibliography = vec![ - // Entry::new("book", "Hegel 2021"), - // Entry::new("book", "Kant 2020"), - // ]; - // let citations = vec!["Hegel 2021".to_string(), "Kant 2020".to_string()]; - // let matched_citations = match_citations_to_bibliography(citations, &bibliography).unwrap(); - // assert_eq!(matched_citations, bibliography); - // } + #[test] + fn match_three_citations_to_bibliography() { + let bibliography = + BiblatexUtils::retrieve_bibliography_entries("tests/mocks/test.bib").unwrap(); + let citations = vec![ + "Hegel 2010".to_string(), + "Hegel 2018".to_string(), + "Burbidge 1981".to_string(), + ]; + let matched_citations = match_citations_to_bibliography(citations, &bibliography).unwrap(); + assert_eq!(matched_citations.len(), 3); + } + + #[test] + fn error_on_ambiguous_citations() { + let bibliography = + BiblatexUtils::retrieve_bibliography_entries("tests/mocks/test.bib").unwrap(); + let citations = vec!["Hegel 1991".to_string()]; + let result = match_citations_to_bibliography(citations, &bibliography); + + match result { + Err(CitationError::AmbiguousMatch(msg)) => { + assert!(msg.contains("'Hegel 1991' might refer to multiple entries: key: hegel1991logic, key: hegel1991encyclopaedialogic")); + } + Err(e) => panic!("Expected AmbiguousMatch, but got different error: {:?}", e), + Ok(_) => panic!("Expected error, but got Ok"), + } + } } From d03c074f1073a25db30add28129548d3ce90ea2d Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 00:16:50 +0200 Subject: [PATCH 24/33] chore: cleanup and todos --- README.md | 2 + src/transformers.rs | 148 +++++++++++++++++++++++--------------------- src/validators.rs | 4 +- 3 files changed, 79 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index ad2aa21..4baecda 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ [![Build status](https://github.com/systemphil/prepyrus/workflows/Continuous%20integration/badge.svg)](https://github.com/systemphil/prepyrus/actions) [![Current crates.io release](https://img.shields.io/crates/v/prepyrus)](https://crates.io/crates/prepyrus) +// TODO Update readme with disambiguation + Prepyrus is a tool for verifying and processing MDX files that contain citations in Chicago author-date style and certain metadata. diff --git a/src/transformers.rs b/src/transformers.rs index d13475d..4ffd6ff 100644 --- a/src/transformers.rs +++ b/src/transformers.rs @@ -29,6 +29,7 @@ pub fn entries_to_strings(entries: &Vec) -> Vec String { let mut full_content = article_file_data.full_file_content.clone(); @@ -44,6 +45,78 @@ pub fn transform_keys_to_citations(article_file_data: &ArticleFileData) -> Strin full_content } +/// Transform MatchedCitation vector into MatchedCitationDisambiguated vector +/// This handles all disambiguation logic in one place +pub fn disambiguate_matched_citations( + citations: Vec, +) -> Vec { + // Group citations by author-year for disambiguation analysis + let mut author_year_groups: HashMap> = HashMap::new(); + + for citation in &citations { + let author = citation.entry.author().unwrap(); + let author_last_name = author[0].name.clone(); + + let date = citation.entry.date().unwrap(); + let year = + BiblatexUtils::extract_year_from_date(&date, citation.entry.key.clone()).unwrap(); + + let author_year_key = format!("{}-{}", author_last_name, year); + author_year_groups + .entry(author_year_key) + .or_insert_with(Vec::new) + .push(citation); + } + + // Create disambiguation mapping + let mut citation_to_disambiguated: HashMap = HashMap::new(); + let mut year_to_disambiguated: HashMap = HashMap::new(); + + for (_author_year_key, group_citations) in author_year_groups { + if group_citations.len() > 1 { + // Need disambiguation - sort by entry key for consistent ordering + let mut sorted_citations = group_citations; + sorted_citations.sort_by(|a, b| a.entry.key.cmp(&b.entry.key)); + + for (index, citation) in sorted_citations.iter().enumerate() { + let letter = char::from(b'a' + index as u8); + let disambiguated = create_disambiguated_citation(letter, &citation.entry); + citation_to_disambiguated.insert(citation.citation_raw.clone(), disambiguated); + let disambiguated_year = create_disambiguated_year(letter, &citation.entry); + year_to_disambiguated.insert(citation.citation_raw.clone(), disambiguated_year); + } + } else { + // No disambiguation needed - convert to standard format + let citation = group_citations[0]; + let standard = create_standard_citation(&citation.citation_raw, &citation.entry); + citation_to_disambiguated.insert(citation.citation_raw.clone(), standard); + } + } + + // Transform all citations using the disambiguation map + citations + .into_iter() + .map(|matched_citation| { + let disambiguated = citation_to_disambiguated + .get(&matched_citation.citation_raw) + .cloned() + .unwrap_or_else(|| matched_citation.citation_raw.clone()); // Fallback + + let disambiguated_year = year_to_disambiguated + .get(&matched_citation.citation_raw) + .cloned() + .unwrap_or_else(|| extract_date(&matched_citation.entry).to_string()); + + MatchedCitationDisambiguated { + citation_raw: matched_citation.citation_raw, + citation_author_date_disambiguated: disambiguated, + year_disambiguated: disambiguated_year, + entry: matched_citation.entry, + } + }) + .collect() +} + /// Transform a book entry into a string according to the Chicago bibliography style. fn transform_book_entry(matched_citation: &MatchedCitationDisambiguated) -> String { let mut book_string = String::new(); @@ -297,78 +370,6 @@ fn extract_pages(entry: &Entry) -> String { pages } -/// Transform MatchedCitation vector into MatchedCitationDisambiguated vector -/// This handles all disambiguation logic in one place -pub fn disambiguate_matched_citations( - citations: Vec, -) -> Vec { - // Group citations by author-year for disambiguation analysis - let mut author_year_groups: HashMap> = HashMap::new(); - - for citation in &citations { - let author = citation.entry.author().unwrap(); - let author_last_name = author[0].name.clone(); - - let date = citation.entry.date().unwrap(); - let year = - BiblatexUtils::extract_year_from_date(&date, citation.entry.key.clone()).unwrap(); - - let author_year_key = format!("{}-{}", author_last_name, year); - author_year_groups - .entry(author_year_key) - .or_insert_with(Vec::new) - .push(citation); - } - - // Create disambiguation mapping - let mut citation_to_disambiguated: HashMap = HashMap::new(); - let mut year_to_disambiguated: HashMap = HashMap::new(); - - for (_author_year_key, group_citations) in author_year_groups { - if group_citations.len() > 1 { - // Need disambiguation - sort by entry key for consistent ordering - let mut sorted_citations = group_citations; - sorted_citations.sort_by(|a, b| a.entry.key.cmp(&b.entry.key)); - - for (index, citation) in sorted_citations.iter().enumerate() { - let letter = char::from(b'a' + index as u8); - let disambiguated = create_disambiguated_citation(letter, &citation.entry); - citation_to_disambiguated.insert(citation.citation_raw.clone(), disambiguated); - let disambiguated_year = create_disambiguated_year(letter, &citation.entry); - year_to_disambiguated.insert(citation.citation_raw.clone(), disambiguated_year); - } - } else { - // No disambiguation needed - convert to standard format - let citation = group_citations[0]; - let standard = create_standard_citation(&citation.citation_raw, &citation.entry); - citation_to_disambiguated.insert(citation.citation_raw.clone(), standard); - } - } - - // Transform all citations using the disambiguation map - citations - .into_iter() - .map(|matched_citation| { - let disambiguated = citation_to_disambiguated - .get(&matched_citation.citation_raw) - .cloned() - .unwrap_or_else(|| matched_citation.citation_raw.clone()); // Fallback - - let disambiguated_year = year_to_disambiguated - .get(&matched_citation.citation_raw) - .cloned() - .unwrap_or_else(|| extract_date(&matched_citation.entry).to_string()); - - MatchedCitationDisambiguated { - citation_raw: matched_citation.citation_raw, - citation_author_date_disambiguated: disambiguated, - year_disambiguated: disambiguated_year, - entry: matched_citation.entry, - } - }) - .collect() -} - /// Create disambiguated citation with letter (e.g., "@hegel2020logic, 123" -> "Hegel 2020a") fn create_disambiguated_citation(letter: char, entry: &Entry) -> String { let author = format_authors_last_name_only(entry.author().unwrap()); @@ -376,6 +377,7 @@ fn create_disambiguated_citation(letter: char, entry: &Entry) -> String { format!("{} {}{}", author, year, letter) } +/// Create a disambiguated year (e.g., "2018a") fn create_disambiguated_year(letter: char, entry: &Entry) -> String { let year = extract_date(entry); format!("{}{}", year, letter) @@ -397,3 +399,5 @@ fn create_standard_citation(raw_citation: &str, entry: &Entry) -> String { raw_citation.to_string() } } + +// TODO build test suite for creating disambiguate_matched_citations diff --git a/src/validators.rs b/src/validators.rs index f2e1476..4babebf 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -100,8 +100,6 @@ pub struct MatchedCitationDisambiguated { pub entry: Entry, } -// TODO build test suite for missing keys when keys are used - impl TryFrom for ArticleFileData { type Error = Box; @@ -359,7 +357,7 @@ fn create_citations_set(citations: Vec) -> Vec { /// Matches citations to the inputted bibliography /// the matched list is returned with full bibliographical details. -/// If any citation is not found in the bibliography, an error is returned. +/// Returns error for any unmatched or ambiguous citations. fn match_citations_to_bibliography( citations: Vec, bibliography: &Vec, From 7cf307207abb6f55ff34fcd28cfee7e991913210 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 00:25:52 +0200 Subject: [PATCH 25/33] chore: tweak ci activation --- .github/workflows/ci.yaml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2abe5e3..bde990e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,5 +1,9 @@ -name: Continuous integration -on: [push, pull_request] +name: Continuous Integration +on: + workflow_dispatch: + pull_request: + branches: ["main"] + types: [opened, synchronize, ready_for_review] jobs: ci: @@ -8,4 +12,4 @@ jobs: - uses: actions/checkout@v3 - uses: actions-rust-lang/setup-rust-toolchain@v1 - run: cargo build - - run: cargo test \ No newline at end of file + - run: cargo test From 9742193b08d10a113496378f2c4ffb0d88b9505c Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 00:27:31 +0200 Subject: [PATCH 26/33] chore: prevent ci from running in draft --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index bde990e..28f0903 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -7,6 +7,7 @@ on: jobs: ci: + if: ${{ github.event.pull_request.draft == false }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 From 207b058a8f4e611ce5ff4fa4c52673c29a4ee98c Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 18:16:54 +0200 Subject: [PATCH 27/33] chore: tests for mixed keys and citations --- src/validators.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/validators.rs b/src/validators.rs index 4babebf..198b506 100644 --- a/src/validators.rs +++ b/src/validators.rs @@ -643,6 +643,20 @@ mod tests_validate_citations { assert_eq!(matched_citations.len(), 3); } + #[test] + fn match_four_mixed_citations_to_bibliography() { + let bibliography = + BiblatexUtils::retrieve_bibliography_entries("tests/mocks/test.bib").unwrap(); + let citations = vec![ + "Hegel 2010".to_string(), + "Hegel 2018".to_string(), + "@doe2021a".to_string(), + "@doe2021e".to_string(), + ]; + let matched_citations = match_citations_to_bibliography(citations, &bibliography).unwrap(); + assert_eq!(matched_citations.len(), 4); + } + #[test] fn error_on_ambiguous_citations() { let bibliography = From 4bda66923659c203f820ed56578317a9190cb9f8 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 18:20:57 +0200 Subject: [PATCH 28/33] chore: fix mock with key --- tests/mocks/data/first-paragraph.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/mocks/data/first-paragraph.mdx b/tests/mocks/data/first-paragraph.mdx index 6954775..749adc2 100644 --- a/tests/mocks/data/first-paragraph.mdx +++ b/tests/mocks/data/first-paragraph.mdx @@ -21,7 +21,7 @@ conceptual structure that presents itself at the beginning of Mechanism. The first determination is described in the following terms: > The object is, as we have seen, the syllogism, whose mediation has been -> sublated and has therefore become an immediate identity (Hegel 1991, 711). +> sublated and has therefore become an immediate identity (@hegel1991logic, 711). Let's unpack this initial thought, without trying to unpack the reference to the `syllogism`. @@ -40,7 +40,7 @@ immediate identity? Hegel clarifies this in the following sentence: > It is therefore in and for itself a universal - universality not in the sense > of a community of properties, but a universality that pervades the -> particularity and in it is immediate individuality (Hegel 1991, 711). +> particularity and in it is immediate individuality (@hegel1991logic, 711). The moments of the `mechanical object` are the determinations of the `Concept` : `universal`, `particular`, and `individual`. It is these moments that are @@ -51,4 +51,4 @@ finds its essence instantiated in particular and individual objects. It is not, for example, like the universal concept of a chair that states that a chair must be "so and so" and that serves as the essence of armchairs and swivel chairs, alike. It is not, as Hegel writes, a universal “in the sense of a community of -properties” (Hegel 1991, 711). +properties” (@hegel1991logic, 711). From f9aa7d4c92c64ec84c01fe66111e1128f8bdc5ee Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 18:24:12 +0200 Subject: [PATCH 29/33] chore: comment addition --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 6a3d7fd..52f315d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,6 +40,7 @@ fn run() -> Result<(), Box> { generate_index_to_file: None, index_link_prefix_rewrite: None, }; + // Normally one would use let cli = Prepyrus::parse_cli(); let config = Prepyrus::build_config(cli, None)?; let all_entries = Prepyrus::get_all_bib_entries(&config.bib_file).unwrap(); From b869c6786d0a18e8860968362b960f465914b0b1 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 18:32:24 +0200 Subject: [PATCH 30/33] fix: race conditions with tests --- src/lib.rs | 2 +- tests/mocks/data-isolated/first-paragraph.mdx | 54 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 tests/mocks/data-isolated/first-paragraph.mdx diff --git a/src/lib.rs b/src/lib.rs index 52f315d..5a9fc53 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,7 +34,7 @@ fn run() -> Result<(), Box> { // Example Command Line Inputs let cli = Cli { bib_file: "tests/mocks/test.bib".to_string(), - target_path: "tests/mocks/data".to_string(), + target_path: "tests/mocks/data-isolated".to_string(), mode: Mode::Verify, ignore_paths: Some(vec!["tests/mocks/data/development.mdx".into()]), generate_index_to_file: None, diff --git a/tests/mocks/data-isolated/first-paragraph.mdx b/tests/mocks/data-isolated/first-paragraph.mdx new file mode 100644 index 0000000..749adc2 --- /dev/null +++ b/tests/mocks/data-isolated/first-paragraph.mdx @@ -0,0 +1,54 @@ +--- +title: First Paragraph +indexTitle: Mechanical Object First Paragraph (Hegel) +description: + Learn about the first paragraph of the Mechanical Object from Hegel's + Science of Logic +isArticle: true +authors: Ahilleas Rokni (2024) +editors: +contributors: Filip Niklas (2024) +--- + +# MDX Test Sample: Broken Up For the Purposes of Testing Prepyrus + +[Link to actual article](https://github.com/systemphil/sphil/blob/main/src/pages/hegel/reference/mechanical-object/first-paragraph.mdx) + +## The Mechanical Object, First Paragraph + +Before even thinking about the `mechanical object`, let us just think about the +conceptual structure that presents itself at the beginning of Mechanism. The +first determination is described in the following terms: + +> The object is, as we have seen, the syllogism, whose mediation has been +> sublated and has therefore become an immediate identity (@hegel1991logic, 711). + +Let's unpack this initial thought, without trying to unpack the reference to the +`syllogism`. + +First, the mediation of the syllogism, whatever that means in concrete terms, +has been sublated . Strictly speaking, Hegel writes that it has been “balanced +out” or “equilibrated” [*ausgeglichen*]. It is because the mediation of the +`syllogism` has been equilibrated that it was sublated. As such, the mediation +of the `syllogism` is not nullified but has been set aside by a more developed +kind of relation - the relation of the `mechanical object` that is now an +immediate identity [*unmittelbare Identität*]. The moments of the `mechanical object` +are immediately identical to each other, and not mediated. + +What exactly are these moments of the `mechanical object` that have become an +immediate identity? Hegel clarifies this in the following sentence: + +> It is therefore in and for itself a universal - universality not in the sense +> of a community of properties, but a universality that pervades the +> particularity and in it is immediate individuality (@hegel1991logic, 711). + +The moments of the `mechanical object` are the determinations of the `Concept` : +`universal`, `particular`, and `individual`. It is these moments that are +immediately identical to each other. In the `mechanical object`, the `universal` +is immediately the `particular` and the `individual`. In other words, the +`universal` is not a universal that has the basic essence of a thing and that +finds its essence instantiated in particular and individual objects. It is not, +for example, like the universal concept of a chair that states that a chair must +be "so and so" and that serves as the essence of armchairs and swivel chairs, +alike. It is not, as Hegel writes, a universal “in the sense of a community of +properties” (@hegel1991logic, 711). From c0a59f15538ee414163c284e83fa1407b07f80f4 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 18:45:27 +0200 Subject: [PATCH 31/33] docs: update docs for 0.4 --- README.md | 58 ++++++++++++++++++++++++++++++++++++------------------ src/lib.rs | 37 ++++++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 4baecda..e18ce94 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,6 @@ [![Build status](https://github.com/systemphil/prepyrus/workflows/Continuous%20integration/badge.svg)](https://github.com/systemphil/prepyrus/actions) [![Current crates.io release](https://img.shields.io/crates/v/prepyrus)](https://crates.io/crates/prepyrus) -// TODO Update readme with disambiguation - Prepyrus is a tool for verifying and processing MDX files that contain citations in Chicago author-date style and certain metadata. @@ -12,7 +10,7 @@ that contain citations in Chicago author-date style and certain metadata. ## Usage -Run `cargo add prepyrus` or add the crate to your `Cargo.toml`: +Add the crate to your `Cargo.toml` and use it as shown below: ```toml [dependencies] @@ -22,18 +20,13 @@ prepyrus = "" Main API interface is the `Prepyrus` impl. Example usage: ```rust -use prepyrus::Prepyrus; +use prepyrus::{ + cli::{Cli, Mode}, + Prepyrus +}; fn main() { - let args = vec![ - "_program_index".to_string(), - "tests/mocks/test.bib".to_string(), // bibliography file - "tests/mocks/data".to_string(), // target directory or .mdx file - "verify".to_string(), // mode - "tests/mocks/data/development.mdx".to_string(), // optional ignore paths, separate with commas if multiple - ]; - - let _ = run(args).unwrap_or_else(|e| { + let _ = run().unwrap_or_else(|e| { eprintln!("Error: {}", e); std::process::exit(1); }); @@ -41,8 +34,19 @@ fn main() { println!("Prepyrus completed successfully!"); } -fn run(args: Vec) -> Result<(), Box> { - let config = Prepyrus::build_config(&args, None)?; +fn run() -> Result<(), Box> { + // Example Command Line Inputs + let cli = Cli { + bib_file: "tests/mocks/test.bib".to_string(), + target_path: "tests/mocks/data-isolated".to_string(), + mode: Mode::Verify, + ignore_paths: Some(vec!["tests/mocks/data/development.mdx".into()]), + generate_index_to_file: None, + index_link_prefix_rewrite: None, + }; + // Normally one would use let cli = Prepyrus::parse_cli(); + + let config = Prepyrus::build_config(cli, None)?; let all_entries = Prepyrus::get_all_bib_entries(&config.bib_file).unwrap(); let mdx_paths = Prepyrus::get_mdx_paths(&config.target_path, Some(config.settings.ignore_paths))?; @@ -51,7 +55,7 @@ fn run(args: Vec) -> Result<(), Box> { let articles_file_data = Prepyrus::verify(mdx_paths, &all_entries)?; // Phase 2: Process MDX files (requires mode to be set to "process") - if config.mode == "process" { + if config.mode == Mode::Process { Prepyrus::process(articles_file_data); } @@ -67,10 +71,14 @@ fn run(args: Vec) -> Result<(), Box> { ## Description -The tool is designed to work with MDX files that contain citations in Chicago author-date style. Examples: +The tool is designed to work with MDX files that contain citations in Chicago author-date style or by BibTex key. Examples: > "...nowhere on heaven or on earth is there anything which does not contain both being and nothing in itself" (Hegel 2010, 61). +> "The equilibrium in which coming-to-be and ceasing-to-be are poised is in the first place becoming itself" (@hegel2010logic, 81). + +> "Existence proceeds from becoming" (see Hegel 2010, 61). + The tool parses and verifies the citations in the MDX files against a bibliography file in BibTeX format (using Biblatex). If the citations are valid, the tool processes the MDX files @@ -78,6 +86,8 @@ by adding a bibliography section at the end of the file. It also adds author, editor, and contributor from the MDX file metadata if available. Finally, it also adds a notes heading at the end if footnotes are present in the file. +If BibTex keys are used, these will be replaced by disambiguated citations during `process` mode. + ## Additional Features **Alphabetical Index Generation** @@ -95,11 +105,21 @@ You can rewrite parts of generated index links using: --link-prefix-rewrite "/content=/articles" ``` +**Handling Ambiguities** + +Version `0.4` introduces citation ambiguity handling. When an author has multiple +works in the same year, such as (Hegel 1991) which might refer to the Miller +translation of the Science of Logic or the Encyclopaedia Logic, the program will +return an error with disambiguation suggestions by key. To solve ambiguous citations, +one must make use of BibTex keys prefixed with @ in the citation, e.g. `(@hegel1991logic)`. + +During `process` mode, keys will be converted to disambiguated citations in Chicago author-date style. + ## Limitations The tool currently only supports citations in Chicago author-date style. -Only book and article entries are currently supported (plans to support more types in the future). -Only the following metadata fields from the target `.mdx` files are supported: +Only book entries are currently supported (plans to support more types in the future). +Only the following metadata fields are supported: - author - editor diff --git a/src/lib.rs b/src/lib.rs index 5a9fc53..8c2d022 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ Add the crate to your `Cargo.toml` and use it as shown below: ```toml [dependencies] -prepyrus = "0.2" +prepyrus = "" ``` Main API interface is the `Prepyrus` impl. Example usage: @@ -67,10 +67,14 @@ fn run() -> Result<(), Box> { ## Description -The tool is designed to work with MDX files that contain citations in Chicago author-date style. Examples: +The tool is designed to work with MDX files that contain citations in Chicago author-date style or by BibTex key. Examples: > "...nowhere on heaven or on earth is there anything which does not contain both being and nothing in itself" (Hegel 2010, 61). +> "The equilibrium in which coming-to-be and ceasing-to-be are poised is in the first place becoming itself" (@hegel2010logic, 81). + +> "Existence proceeds from becoming" (see Hegel 2010, 61). + The tool parses and verifies the citations in the MDX files against a bibliography file in BibTeX format (using Biblatex). If the citations are valid, the tool processes the MDX files @@ -78,6 +82,35 @@ by adding a bibliography section at the end of the file. It also adds author, editor, and contributor from the MDX file metadata if available. Finally, it also adds a notes heading at the end if footnotes are present in the file. +If BibTex keys are used, these will be replaced by disambiguated citations during `process` mode. + +## Additional Features + +**Alphabetical Index Generation** + +When running in process mode with the `--generate-index-file ` option, Prepyrus now: + +- Extracts all `indexTitles` from .mdx files. +- Sorts them alphabetically by title. +- Groups them under ## headings by first letter (e.g., ## A, ## B, etc). +- Writes a neatly structured index to the specified .mdx file. + +You can rewrite parts of generated index links using: + +``` +--link-prefix-rewrite "/content=/articles" +``` + +**Handling Ambiguities** + +Version `0.4` introduces citation ambiguity handling. When an author has multiple +works in the same year, such as (Hegel 1991) which might refer to the Miller +translation of the Science of Logic or the Encyclopaedia Logic, the program will +return an error with disambiguation suggestions by key. To solve ambiguous citations, +one must make use of BibTex keys prefixed with @ in the citation, e.g. `(@hegel1991logic)`. + +During `process` mode, keys will be converted to disambiguated citations in Chicago author-date style. + ## Limitations The tool currently only supports citations in Chicago author-date style. From 7f1c87094c27c80e8bc331ce79697b723c7dcaa3 Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 18:45:59 +0200 Subject: [PATCH 32/33] chore: bump minor --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2807d10..2b831d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -231,7 +231,7 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "prepyrus" -version = "0.3.1" +version = "0.4.0" dependencies = [ "biblatex", "clap", diff --git a/Cargo.toml b/Cargo.toml index 83597a0..77eef5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ repository = "https://github.com/systemphil/prepyrus" readme = "README.md" categories = ["database", "parser-implementations", "text-processing"] keywords = ["bibtex", "biblatex", "mdx", "parser", "citation"] -version = "0.3.1" +version = "0.4.0" edition = "2021" [dependencies] From 4dfda369c272bf3fbf235f65262a2f9eafa4331b Mon Sep 17 00:00:00 2001 From: Filip Niklas Date: Wed, 2 Jul 2025 18:47:40 +0200 Subject: [PATCH 33/33] fix: test --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 8c2d022..a2ec331 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -97,7 +97,7 @@ When running in process mode with the `--generate-index-file ` optio You can rewrite parts of generated index links using: -``` +```txt --link-prefix-rewrite "/content=/articles" ```