Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
4f06f50
Add files via upload
RohanBhattaraiNP Mar 31, 2025
79e8a38
Update editor.py
RohanBhattaraiNP Mar 31, 2025
659dfb1
Merge pull request #1 from caltechlibrary/main
RohanBhattaraiNP May 7, 2025
104740f
Merge branch 'caltechlibrary:main' into main
RohanBhattaraiNP May 7, 2025
7d6ab3f
Merge branch 'caltechlibrary:main' into main
RohanBhattaraiNP May 7, 2025
31bb610
Merge pull request #2 from caltechlibrary/main
RohanBhattaraiNP May 19, 2025
25b94d8
Add updated CITATION.cff from codemeta.json file
RohanBhattaraiNP May 19, 2025
d9318e5
Black_Formatting
RohanBhattaraiNP May 19, 2025
abe8aaa
Update test_matchers.py
RohanBhattaraiNP May 19, 2025
b6af8ae
Update caltechauthors.py
RohanBhattaraiNP May 19, 2025
d1afcdf
Update test_matchers.py
RohanBhattaraiNP May 19, 2025
8f303c4
Merge pull request #3 from caltechlibrary/main
RohanBhattaraiNP Jun 4, 2025
4729676
Add updated CITATION.cff from codemeta.json file
RohanBhattaraiNP Jun 4, 2025
c88bf89
Update caltechauthors.py
RohanBhattaraiNP Jun 4, 2025
52f4c30
Update caltechauthors.py
RohanBhattaraiNP Jun 4, 2025
0025215
Update test_matchers.py
RohanBhattaraiNP Jun 4, 2025
1e3c2f1
Update test_matchers.py
RohanBhattaraiNP Jun 4, 2025
3ad7bd2
Update test.yml
RohanBhattaraiNP Jun 4, 2025
9300b13
Update caltechauthors.py
RohanBhattaraiNP Jun 4, 2025
2f7d245
Update test_matchers.py
RohanBhattaraiNP Jun 4, 2025
d283a14
Update test_matchers.py
RohanBhattaraiNP Jun 4, 2025
3c6d717
Update caltechauthors.py
RohanBhattaraiNP Jun 4, 2025
ec96733
Update codemeta.json
RohanBhattaraiNP Jun 4, 2025
638d0aa
Add updated CITATION.cff from codemeta.json file
RohanBhattaraiNP Jun 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Test CaltechAuthors Matcher

on:
push:
paths:
- 'ames/matchers/caltechauthors.py'
- 'tests/**'
pull_request:
paths:
- 'ames/matchers/caltechauthors.py'
- 'tests/**'
workflow_dispatch:

jobs:
test-caltechauthors:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt || true

- name: Run tests for caltechauthors
env:
RDMTOK: ${{ secrets.RDMTOK }}
run: |
PYTHONPATH=${{ github.workspace }} python -m unittest discover -s tests -p 'test_matchers.py'
5 changes: 4 additions & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ authors:
- family-names: Doiel
given-names: Robert
orcid: https://orcid.org/0000-0003-0900-6903
- family-names: Bhattarai
given-names: Rohan
orcid: https://orcid.org/0009-0007-0323-4733
- family-names: Won
given-names: Elizabeth
orcid: https://orcid.org/0009-0002-2450-6471
Expand All @@ -21,4 +24,4 @@ keywords:
- GitHub
- metadata
- software
date-released: 2025-05-19
date-released: 2025-06-04
8 changes: 5 additions & 3 deletions add_orcid_script.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import csv,os
import csv, os

with open('orcids.csv', 'r') as f:
with open("orcids.csv", "r") as f:
reader = csv.reader(f)
orcid_list = list(reader)
for orcid_data in orcid_list:
orcid = orcid_data[8]
clpid = orcid_data[10]
os.system(f'python run_authors_name_update.py {clpid} {orcid} -add -new-scheme orcid')
os.system(
f"python run_authors_name_update.py {clpid} {orcid} -add -new-scheme orcid"
)
1 change: 1 addition & 0 deletions ames/harvesters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@
from .caltechauthors import is_file_present
from .caltechauthors import get_series_records
from .caltechauthors import generate_data_citation_csv
from .caltechauthors import get_data_availability_links
62 changes: 59 additions & 3 deletions ames/harvesters/caltechauthors.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,7 @@ def get_author_records(
query = f'?q=metadata.creators.person_or_org.identifiers.identifier%3A"{author_identifier}"'

if date:
query += (
f"%20AND%20metadata.publication_date%3A%5B{date}%20TO%20%2A%20%5D"
)
query += f"%20AND%20metadata.publication_date%3A%5B{date}%20TO%20%2A%20%5D"

if token:
headers = {
Expand Down Expand Up @@ -482,3 +480,61 @@ def generate_data_citation_csv():
)

print(f"Saved {len(all_citations)} citations to {output_file}")


def get_data_availability_links(token=None, size=25):
base_url = "https://authors.library.caltech.edu/api/records?q=metadata.additional_descriptions.type.id%3A%22data-availability%22&size=25&sort=bestmatch"
base_file_url_template = (
"https://authors.library.caltech.edu/api/records/{record_id}/files"
)

token = os.environ.get("RDMTOK")

output_file = "test_results_harvesters.csv"

headers = {}
if token:
headers = {
"Authorization": f"Bearer {token}",
"Content-type": "application/json",
}

response = requests.get(base_url, headers=headers)
if response.status_code != 200:
print(
f"Error: Unable to fetch records from the API. Status code: {response.status_code}"
)
exit()

records = response.json().get("hits", {}).get("hits", [])

if not records:
print("No records found.")
exit()

results = []
for record in records:
record_id = record.get("id")
links = record.get("metadata", {}).get("additional_descriptions", [])

for link_data in links:
description = link_data.get("description", "")
links_in_description = extract_https_links(description)
for link in links_in_description:
classification = classify_link(link)
cleaned = clean_link(link)
filename = extract_filename_from_link(link)
file_present = is_file_present(record_id, filename)

results.append(
{
"record_id": record_id,
"original_link": link,
"classification": classification,
"cleaned_link": cleaned,
"filename": filename,
"file_present": file_present,
}
)

return results
1 change: 1 addition & 0 deletions ames/matchers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@
from .caltechauthors import save_metadata_to_file
from .caltechauthors import add_related_identifiers_from_csv
from .caltechauthors import add_authors_affiliations
from .caltechauthors import process_link_updates
Loading