From 4f06f50d913430b1786b9b6e186526d453044731 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Mon, 31 Mar 2025 02:55:44 -0700
Subject: [PATCH 01/19] Add files via upload

---
 ames/harvesters/editor.py  | 133 +++++++++++++++++++++++++++++
 ames/harvesters/output.csv | 168 +++++++++++++++++++++++++++++++++++++
 2 files changed, 301 insertions(+)
 create mode 100644 ames/harvesters/editor.py
 create mode 100644 ames/harvesters/output.csv

diff --git a/ames/harvesters/editor.py b/ames/harvesters/editor.py
new file mode 100644
index 00000000..6a24a38e
--- /dev/null
+++ b/ames/harvesters/editor.py
@@ -0,0 +1,133 @@
+import csv
+import json
+import requests
+from caltechdata_edit import caltechdata_edit
+
+# Read the CSV file
+records = []
+with open('output.csv', 'r') as csvfile:
+    reader = csv.DictReader(csvfile)
+    for row in reader:
+        records.append(row)
+
+# Access token for authentication
+token = "moaclkv2MHDHoGklIZs7ABF5mmBZNcxKGgmHY4yyHaUAGbCuyO3DNXhehpL7"
+
+# Using the development system (production=False)
+production = False
+base_url = "https://data.caltechlibrary.dev"
+
+# Set up headers for API requests
+headers = {
+    "Authorization": f"Bearer {token}",
+    "Content-type": "application/json",
+}
+
+def add_related_identifier(record_id, doi, caltech_author_id):
+    """Add DOI and CaltechAUTHORS_ID to related identifiers directly using the API"""
+    print(f"Processing Test_ID: {record_id} with DOI: {doi} and CaltechAUTHORS_ID: {caltech_author_id}")
+    
+    # First, get the current record
+    response = requests.get(f"{base_url}/api/records/{record_id}", headers=headers)
+    
+    if response.status_code != 200:
+        print(f"Error fetching record {record_id}: {response.status_code}")
+        print(response.text)
+        return False
+    
+    record_data = response.json()
+    
+    # Check if there's already a draft
+    draft_response = requests.get(f"{base_url}/api/records/{record_id}/draft", headers=headers)
+    
+    if draft_response.status_code == 200:
+        # Use the draft if it exists
+        record_data = draft_response.json()
+    else:
+        # Create a draft if it doesn't exist
+        draft_create_response = requests.post(
+            f"{base_url}/api/records/{record_id}/draft", 
+            headers=headers
+        )
+        
+        if draft_create_response.status_code != 201:
+            print(f"Error creating draft for {record_id}: {draft_create_response.status_code}")
+            print(draft_create_response.text)
+            return False
+        
+        record_data = draft_create_response.json()
+    
+    # Update the related identifiers
+    related_identifiers = record_data.get("metadata", {}).get("related_identifiers", [])
+    if related_identifiers is None:
+        related_identifiers = []
+    
+    # Check if DOI already exists
+    doi_exists = any(identifier.get("identifier") == doi for identifier in related_identifiers)
+    
+    # Check if CaltechAUTHORS_ID URL already exists
+    author_url = f"https://authors.library.caltech.edu/records/{caltech_author_id}"
+    author_url_exists = any(identifier.get("identifier") == author_url for identifier in related_identifiers)
+    
+    # Add the DOI if it doesn't exist
+    if not doi_exists:
+        new_doi_identifier = {
+            "relation_type": {"id": "issupplementedby"},
+            "identifier": doi,
+            "scheme": "doi",
+            "resource_type": {"id": "publication"}
+        }
+        related_identifiers.append(new_doi_identifier)
+        print(f"Adding DOI related identifier: {doi}")
+    else:
+        print(f"DOI {doi} already exists in related identifiers")
+    
+    # Add the CaltechAUTHORS_ID URL if it doesn't exist
+    if not author_url_exists:
+        new_author_identifier = {
+            "relation_type": {"id": "isreferencedby"},
+            "identifier": author_url,
+            "scheme": "url",
+            "resource_type": {"id": "publication"}
+        }
+        related_identifiers.append(new_author_identifier)
+        print(f"Adding CaltechAUTHORS_ID related identifier: {author_url}")
+    else:
+        print(f"CaltechAUTHORS_ID URL {author_url} already exists in related identifiers")
+    
+    record_data["metadata"]["related_identifiers"] = related_identifiers
+    
+    # Update the draft
+    update_response = requests.put(
+        f"{base_url}/api/records/{record_id}/draft",
+        headers=headers,
+        json=record_data
+    )
+    
+    if update_response.status_code != 200:
+        print(f"Error updating draft for {record_id}: {update_response.status_code}")
+        print(update_response.text)
+        return False
+    
+    # Publish the draft
+    publish_response = requests.post(
+        f"{base_url}/api/records/{record_id}/draft/actions/publish",
+        headers=headers
+    )
+    
+    if publish_response.status_code != 202:
+        print(f"Error publishing draft for {record_id}: {publish_response.status_code}")
+        print(publish_response.text)
+        return False
+    
+    print(f"Successfully added related identifier {doi} to {record_id} and published the changes")
+    return True
+
+# Process each record
+for record in records:
+    test_id = record['Test_ID']
+    doi = record['CaltechAUTHORS_DOI']
+    caltech_author_id = record['CaltechAUTHORS_ID']
+    add_related_identifier(test_id, doi, caltech_author_id)
+
+print("Processing complete")
\ No newline at end of file
diff --git a/ames/harvesters/output.csv b/ames/harvesters/output.csv
new file mode 100644
index 00000000..2d5bc431
--- /dev/null
+++ b/ames/harvesters/output.csv
@@ -0,0 +1,168 @@
+CaltechAUTHORS_ID,CaltechAUTHORS_DOI,Related_DOI,Data_ID,Cross_Link,Test_ID
+bwww3-z8y74,10.1093/mnras/staa2808,10.22002/D1.1458,3hqgp-jhw61,No,99s7k-d6f58
+dm3mv-q1b76,10.1038/s41524-019-0216-x,10.22002/D1.1256,yrn11-jb916,No,t899g-xww46
+ahsp6-e8a25,10.1186/s40168-020-0785-4,10.22002/D1.1295,88yzp-h0n85,No,ndyrw-3gq31
+n1n6t-pxs56,10.1038/s41586-021-03601-4,10.22002/D1.1455,m47ts-35f81,No,ydg6m-x6q85
+zjfrp-a9k69,10.1126/science.abb3099,10.22002/D1.1444,s3ejh-8rk72,No,9vgt2-8vy76
+jvzym-1mh07,10.1021/acssynbio.1c00592,10.22002/D1.2140,tvy11-z5a48,No,rff39-mtm48
+r8qh8-y4065,10.1128/JCM.01785-21,10.22002/D1.1942,bv2tf-aap55,No,whd0t-96h94
+nrbae-qk103,10.1098/rspa.2021.0561,10.22002/D1.2173,m1b0w-0zs59,No,6atdb-1bj05
+nrbae-qk103,10.1098/rspa.2021.0561,10.22002/D1.2024,e3sea-97705,No,0x4v2-cb178
+dcas8-y4s76,10.1126/science.abg9765,10.22002/D1.1882,s7epj-5ry14,No,c0e9h-2n643
+k3h8s-vjz03,10.1016/j.atmosenv.2021.118809,10.22002/D1.1985,254mc-zpg74,No,4qwbc-5sb81
+9x8wh-65414,10.7554/eLife.65092,10.22002/D1.2076,k9hkr-v0978,No,v22y2-m7n82
+9x8wh-65414,10.7554/eLife.65092,10.22002/D1.2157,g3sp0-33085,No,9eh2f-k1a76
+9x8wh-65414,10.7554/eLife.65092,10.22002/D1.1790,qxm6c-q5p97,No,qn0ny-t3b46
+15qr1-8d538,10.1029/2019je006190,10.22002/D1.1326,dwc1w-r9a68,No,7gpm7-hb420
+344g6-s0176,10.1007/s12678-021-00694-3,10.22002/D1.1632,1km87-52j70,No,fdx2t-3rx08
+nxrw7-zx123,10.1038/s41561-021-00862-6,10.22002/D1.2150,0a0fg-yer22,No,36zbz-rbn41
+pa1b8-a8596,10.1029/2020gl089917,10.22002/D1.1376,9wpke-7eg08,No,358dc-1ck54
+ezbzn-3qz02,10.1038/s41567-021-01492-w,10.22002/D1.2202,59zzx-xhb23,No,kcgnj-he106
+hcg0j-pk874,10.1029/2020gl087477,10.22002/D1.1348,j5fk1-0g306,No,wkwk4-8ym88
+40n3g-jae61,10.1029/2017GC007260,10.22002/D1.320,vymbv-n8p13,No,65wey-dqt49
+40n3g-jae61,10.1029/2017GC007260,10.22002/D1.321,bwxz5-0kc80,No,yz4a8-bd988
+5624m-z4s58,10.1029/2019gc008862,10.22002/D1.1380,mm5dq-05y22,No,aekvq-hh376
+8scha-rhv61,10.1029/2018MS001313,10.22002/D1.933,a49j4-qt740,No,w08gs-1t250
+prs7s-hfr18,10.1038/s41524-022-00747-1,10.22002/D1.1983,dekcc-2tb35,No,2g1py-bnf41
+99bn3-pg365,10.7554/eLife.55308,10.22002/d1.1336,wyrrw-ffe20,No,yd65f-wkz21
+99bn3-pg365,10.7554/eLife.55308,10.22002/D1.1331,8ftre-92r60,No,q9xqq-xrk20
+q0pn9-dgp08,10.1029/2021ea001637,10.22002/D1.2028,p6614-mjw68,No,a7rma-wkb68
+q0pn9-dgp08,10.1029/2021ea001637,10.22002/D1.222,k8gnx-7hj46,No,6ffmj-n7192
+jk4kd-j8n07,10.1029/2020gl091699,10.22002/D1.1666,bk8pf-qvx09,No,gv80y-yq992
+zyge8-axx47,10.5194/se-11-2283-2020,10.22002/D1.1447,6jegf-05x08,No,443sq-jh364
+8xx2v-fk653,10.1093/pnasnexus/pgad033,10.22002/D1.20223,0yw13-j0441,No,szsyt-ytf03
+6famx-f0654,10.1038/s41467-020-16224-6,10.22002/D1.1371,60wg2-9qz17,No,v8shp-3a074
+qjzq0-71624,10.1029/2019jb018597,10.22002/D1.1377,nwj4y-hcm74,No,aan3s-zfk64
+nm7th-hgm94,10.1073/pnas.2102267118,10.22002/D1.1996,10svm-aq733,No,47mgd-ydg26
+zfjpk-xtj13,10.1063/5.0006684,10.22002/D1.1379,dxrpq-xyx02,No,jmzz8-6td18
+kp5yc-avm39,10.1038/s41586-019-1447-1,10.22002/D1.1160,w8hpj-0y065,No,6a7xt-crp87
+c9geb-11h23,10.1073/pnas.1907869116,10.22002/D1.1241,fbv6r-hg153,No,cqnnk-chw84
+zqrkn-f3w82,10.1038/s41524-019-0213-0,10.22002/D1.1178,ehp06-pcf04,No,9r1k1-95d28
+zqrkn-f3w82,10.1038/s41524-019-0213-0,10.22002/D1.1179,p2bsg-7tb62,No,pehkc-etn63
+arj9r-nfc42,10.1038/s41467-019-13262-7,10.22002/D1.1296,2vdrb-bmr68,No,26ts2-f1j05
+kz506-yqx70,10.1029/2019JB018922,10.22002/d1.1328,yen0b-fed04,No,vw4ra-qrp13
+gjzvx-agj04,10.1038/s41598-020-58586-3,10.22002/D1.1298,8d1ar-e8e29,No,gepec-02x71
+ztte2-h5j24,10.1088/1367-2630/ac1144,10.22002/D1.1451,hvtvx-rjq36,No,nz1k8-4ck35
+ngtts-mjd88,10.1029/2021jb021976,10.22002/D1.2009,7tp28-jp627,No,bzqfj-f7077
+ngtts-mjd88,10.1029/2021jb021976,10.22002/D1.2010,e80sv-5py92,No,ffmm8-ckg43
+jemmb-kp969,,10.22002/D1.1921,d2qn0-tse31,No,e3rhe-kdq51
+ke1kq-rk838,10.1038/s41467-022-33449-9,10.22002/D1.20291,9bkyr-21532,No,8b5nn-hbt51
+4jqdv-nrv37,10.1016/j.cels.2022.03.002,10.22002/D1.1693,4ry4k-5gf89,No,h2e81-mz738
+rsg2f-2nj92,10.1126/sciadv.adg6618,10.22002/D1.2090,c2w53-e7q49,No,15m2r-ep710
+qgw1r-cg440,10.1111/sed.13100,10.22002/D1.1645,8zehn-8rr62,No,mzph0-txc86
+w2ev0-snj25,10.1029/2021jf006392,10.22002/D1.8962,yrkvm-9bh56,No,d3ek1-z9x67
+750xa-xag63,,10.22002/D1.1286,zcndp-vg341,No,27jch-y5k25
+j80sb-te308,10.1088/1748-3190/ad277f,10.22002/c5cyj-mev09,c5cyj-mev09,No,86x6j-n4q55
+mypge-8d791,10.1038/s41586-023-06227-w,10.22002/7h65h-89163,7h65h-89163,No,4m1z1-bhc25
+90vmt-dcf44,10.1029/2019je006289,10.22002/D1.1349,kmde9-m7g44,No,y28dp-mfh71
+fx2at-3ps68,10.1029/2019jg005533,10.22002/D1.1226,xc9rx-8qs95,No,rbw1s-e1198
+pjjee-1w296,10.1029/2021MS002671,10.22002/D1.1429,3p6y1-a8b95,No,ymygq-mpz35
+79r10-1a091,10.1029/2021jf006406,10.22002/D1.2078,5qqjp-5g813,No,bkqq3-kfa29
+79r10-1a091,10.1029/2021jf006406,10.22002/D1.9200,dgxbk-45k21,No,9ynk4-ajv10
+4a7y9-nzb92,10.1016/j.cels.2022.03.001,10.22002/D1.1692,5e4cj-34824,No,bw0gp-8tt54
+4a7y9-nzb92,10.1016/j.cels.2022.03.001,10.22002/D1.20047,aymp3-qzt70,No,15qm4-77249
+6k82a-tnj57,10.24349/pjye-gkeo,10.22002/D1.1877,01pg4-5r437,No,h8ps1-0yp52
+1qwbd-2q359,10.1029/2021av000534,10.22002/D1.2176,kasms-vp209,No,e1m0y-cpf40
+dv27e-2sy47,10.1073/pnas.2023433118,10.22002/D1.1667,8rsdg-rxz52,No,kaayd-19r23
+tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1915,653dj-3b761,No,mtzc2-18p79
+tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1900,82zcr-mxa32,No,n1vb7-m1g75
+tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1905,cj7ec-8g780,No,j8p30-za980
+tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1914,5vd8b-sz113,No,8rp1w-qfz76
+tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1919,zh7s1-8g617,No,pqak6-n5333
+tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1917,2t5tj-e5w45,No,w80kf-hyt30
+tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1916,0vgd3-9cp11,No,9w2er-n4713
+tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1918,5x3vq-z1b96,No,zb2dj-d0f86
+h0e90-htt06,10.1029/2021jc018375,10.22002/D1.1296,2vdrb-bmr68,No,26ts2-f1j05
+0f32n-bgm37,10.1029/2021gl095227,10.22002/D1.1347,8hm1f-w5492,No,9tcpx-vf039
+0f32n-bgm37,10.1029/2021gl095227,10.22002/D1.2033,jn6d1-wfj05,No,t8h2f-87s64
+nde62-t1r58,10.1029/2021JB022462,10.22002/D1.1970,h6438-cgy98,No,pbgwc-7b016
+rwr63-xyt69,10.1029/2021gl096503,10.22002/D1.2135,pyr6e-wt732,No,2kvfz-erc91
+r1pva-c8x21,10.1029/2022jb024329,10.22002/D1.20035,wxsas-96g08,No,8tj1d-22780
+rb73g-vqz89,10.1016/j.xcrp.2022.100959,10.22002/D1.20057,bfap4-h2m21,No,px5ss-5y916
+wr8qq-s8t58,10.1109/WHISPERS.2016.8071774,10.22002/D1.222,k8gnx-7hj46,No,6ffmj-n7192
+ynbgx-0tx98,10.1029/2021ms002747,10.22002/D1.1231,meh5c-wy279,No,61shk-rcs26
+6gcmj-reb48,10.1038/s41586-022-04749-3,10.22002/D1.2155,y72mq-emt30,No,5jjz9-40b67
+vpnng-szs82,10.1126/sciadv.abn9545,10.22002/D1.20048,hend5-jzt61,No,sb743-qp239
+1q6gn-mvc46,10.1029/2022ms003105,10.22002/D1.20052,j8mw7-fm491,No,5mdtq-15724
+ck6pf-68621,10.1093/gji/ggab407,10.22002/D1.1955,31emd-wmv98,No,33y3z-2te02
+d20js-7z640,10.1029/2022gl101715,10.22002/vn6v0-pfr77,vn6v0-pfr77,No,dcy6h-wem15
+rsekp-4g847,10.1109/tgrs.2023.3305194,10.22002/D1.1347,8hm1f-w5492,No,9tcpx-vf039
+cmvm1-e9379,10.1029/2023gl105205,10.22002/D1.1347,8hm1f-w5492,No,9tcpx-vf039
+wkjh4-6nf06,10.1101/2022.03.02.22271724,10.22002/D1.20049,pmm08-6q581,No,rtc14-wmr12
+p6mn1-gs660,10.1101/2022.06.17.496478,10.22002/D1.20199,5vng7-8ne78,No,afznk-vbp04
+vtef0-x7037,10.1101/2022.07.13.22277113,10.22002/D1.20223,0yw13-j0441,No,szsyt-ytf03
+q40ve-64h03,10.1002/essoar.10510937.2,10.22002/D1.20052,j8mw7-fm491,No,5mdtq-15724
+4w29e-xpa51,10.1002/essoar.10510458.2,10.22002/D1.1891,n1yye-2z213,No,zeyw2-27p09
+e9yy0-v0658,10.1002/essoar.10511838.1,10.22002/D1.20038,e1acg-e0k08,No,3pg72-ddh27
+x0cjg-kxg25,10.1101/2022.07.13.22277513,10.22002/D1.20223,0yw13-j0441,No,szsyt-ytf03
+jh5g2-fyg97,10.1002/essoar.10512148.1,10.22002/D1.20252,dhfgs-p7319,No,wjhsn-kbv79
+fgbgn-2rk49,10.1002/essoar.10512118.2,10.22002/D1.20248,pybpv-w7661,No,wp4x8-1vc62
+91hvf-49c47,,10.22002/D1.1438,3ejxg-69q72,No,2ths5-hkw28
+yvded-18923,10.1101/2020.07.25.210468,10.22002/D1.1407,fc4k3-75q88,No,c4fh9-f1170
+166f3-tj121,,10.22002/D1.1100,dkdt4-e0x94,No,bcn0f-n7z19
+nb3f9-s8p96,10.1002/essoar.10504190.1,10.22002/D1.1617,4peqr-t0723,No,npzd5-bzb58
+5xkg8-an610,10.1101/762773,10.22002/d1.1276,gye0e-gw976,No,gqpst-s3d05
+24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1915,653dj-3b761,No,mtzc2-18p79
+24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1900,82zcr-mxa32,No,n1vb7-m1g75
+24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1905,cj7ec-8g780,No,j8p30-za980
+24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1914,5vd8b-sz113,No,8rp1w-qfz76
+24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1917,2t5tj-e5w45,No,w80kf-hyt30
+24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1916,0vgd3-9cp11,No,9w2er-n4713
+24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1918,5x3vq-z1b96,No,zb2dj-d0f86
+2nvtm-7qe34,10.1029/2019je006191,10.22002/d1.1304,dmrkv-3xn63,No,91tsz-cfc02
+m07dw-3jq86,10.1038/s41587-019-0372-z,10.22002/D1.1311,a73n8-3pa89,No,dth8s-nba11
+n3qyx-kf341,10.1029/2019je006156,10.22002/D1.1299,df548-mhy96,No,a15h1-f1j49
+ks7h3-tf644,10.1038/s42005-021-00703-3,10.22002/D1.1858,vp3y4-ef326,No,2f5p2-2e148
+8mp48-93523,,10.22002/D1.1315,d8w02-xng39,No,q0346-mn915
+a7vwv-mhw66,10.1101/2022.07.10.499405,10.22002/D1.20215,awtz3-tz122,No,xfqva-b4991
+h26g5-k0311,10.1029/2018ea000416,10.22002/D1.1211,5ptpj-x8c08,No,7f5x5-pa209
+h26g5-k0311,10.1029/2018ea000416,10.22002/D1.1212,3j8a8-bzn05,No,xk40w-p0g08
+y47b9-pmn34,10.48550/arXiv.1905.06360,10.22002/D1.1241,fbv6r-hg153,No,cqnnk-chw84
+zf2v5-vx810,10.1029/2018JE005706,10.22002/D1.1087,hhzzq-yw058,No,e67v1-3kg31
+zf2v5-vx810,10.1029/2018JE005706,10.22002/D1.1085,44km0-er448,No,g867t-x4d04
+zf2v5-vx810,10.1029/2018JE005706,10.22002/D1.1086,c8fwc-kvg38,No,c1gj1-hnb74
+k6jp7-8js32,10.1029/2019gl086424,10.22002/d1.1317,rdv1n-st737,No,8kgzq-0z165
+se7bz-k9a90,10.1029/2019jb018855,10.22002/D1.1293,mtn3h-frk09,No,hrpxj-0tr80
+frxqw-qyw74,10.1063/1.5054927,10.22002/D1.305,0t2qn-hvq19,No,f3cwe-hgm53
+dzgj9-y8y49,10.3390/rs12213586,10.22002/d1.1182,45st6-jvh02,No,xk1ex-axk10
+ge4t4-7e026,10.1029/2019je006298,10.22002/D1.1318,gyvwk-btq07,No,te8rh-sxq88
+ge4t4-7e026,10.1029/2019je006298,10.22002/D1.1211,5ptpj-x8c08,No,7f5x5-pa209
+ze84b-v1782,10.1101/2020.12.09.20239467,10.22002/D1.1702,24265-gtd53,No,y98md-bp961
+d4h00-xf206,10.1029/2022jb025425,10.22002/D1.20252,dhfgs-p7319,No,wjhsn-kbv79
+x179p-jhv14,10.7554/elife.85370,10.22002/D1.2157,g3sp0-33085,No,9eh2f-k1a76
+ywtpy-nka66,10.1038/s41929-021-00618-w,10.22002/D1.1632,1km87-52j70,No,fdx2t-3rx08
+zwfrv-rbn84,10.1038/s41467-021-25443-4,10.22002/D1.2032,jvjkh-d9g50,No,x6gw8-as345
+y99cf-fkr11,10.1016/j.icarus.2022.115079,10.22002/D1.20170,3ra81-96y32,No,fddq3-zmn81
+ernz6-2xp43,10.1101/2022.03.21.484932,10.22002/D1.20060,hwv5v-m9x76,No,vfz20-ydp76
+rxd0x-ag678,10.1029/2020JE006675,10.22002/D1.1617,4peqr-t0723,No,npzd5-bzb58
+t2ser-vfe73,10.1029/2020ja027796,10.22002/D1.1333,r35r5-sb884,No,ngxcf-0a955
+v84d4-vcs34,10.1038/s41586-020-2872-x,10.22002/D1.1647,jhn25-fsd29,No,24b5z-t4m60
+c03z0-nfg11,10.1038/s41598-020-77073-3,10.22002/D1.1407,fc4k3-75q88,No,c4fh9-f1170
+4fdtn-y2e21,10.1029/2020je006606,10.22002/D1.1628,d5jt1-wqt82,No,wx24e-5yd65
+ct2sc-f7m12,10.1029/2019av000140,10.22002/D1.1347,8hm1f-w5492,No,9tcpx-vf039
+rdqe2-hsq97,10.1029/2020jb021369,10.22002/D1.1670,e200b-xsm06,No,4qv0g-7yt28
+363j8-nw138,10.1038/s41587-021-00870-2,10.22002/D1.1876,7704n-f6m57,No,3gz1s-d5261
+r7rsd-a7a17,,10.22002/D1.2026,51sah-d9r47,No,mn3a1-x3x94
+r7rsd-a7a17,,10.22002/D1.2025,p8ppf-7ff93,No,e1acb-n8a94
+6f3gb-97h58,10.1126/science.abg2947,10.22002/D1.1976,zpzee-79351,No,px1w1-xqr37
+wfg20-4tn76,10.1029/2020tc006210,10.22002/D1.1388,8dhjv-rvf91,No,zwjd2-rw873
+n2b9p-8jf71,10.1029/2021JB021886,10.22002/D1.1612,ryqnw-bdf94,No,mzwax-jw720
+30s9e-9z096,10.1029/2021jb022676,10.22002/D1.2009,7tp28-jp627,No,bzqfj-f7077
+30s9e-9z096,10.1029/2021jb022676,10.22002/D1.2141,dwkqy-hkj69,No,e0zwb-f9s86
+30s9e-9z096,10.1029/2021jb022676,10.22002/D1.2142,665w3-pbj51,No,dygfc-38b42
+fx63v-hsd80,10.1029/2021gl092598,10.22002/D1.2023,m1r58-kvb98,No,ft45b-8nw98
+vfg98-9zt14,10.5194/gmd-14-6309-2021,10.22002/D1.971,8mkmg-c2938,No,3ydw5-6yx19
+b0xvf-xn162,10.1038/s41561-021-00706-3,10.22002/D1.1874,3hrqe-5x450,No,7m4ep-06188
+b0xvf-xn162,10.1038/s41561-021-00706-3,10.22002/D1.1873,8ydek-yt879,No,m86ym-m3603
+08dr4-w6943,10.1029/2021je006828,10.22002/D1.1971,gv4qf-qwa77,No,mm42q-wft65
+yzjs7-1cv55,10.1016/j.jsb.2022.107860,10.22002/D1.2096,2vtv3-pp862,No,tyest-mem06
+yzjs7-1cv55,10.1016/j.jsb.2022.107860,10.22002/D1.2099,tz8jq-0mk77,No,yypym-x5693
+yzjs7-1cv55,10.1016/j.jsb.2022.107860,10.22002/D1.2103,kfkqj-q6557,No,4khh3-dpp37
+0mmmp-9pz59,10.1029/2023jb026488,10.22002/D1.20248,pybpv-w7661,No,wp4x8-1vc62
+phbx7-m8a69,10.1088/2515-7655/ac817e,10.22002/D1.20061,q9zpw-g8s64,No,skmrx-14a11
+r5hzq-dzy83,10.2110/jsr.2022.032,10.22002/D1.20044,hzwqz-5wr07,No,0y3zw-g1n05
+sar53-81n52,10.1016/j.epsl.2023.118277,10.22002/D1.1619,s6ey4-qpm11,No,ac32t-mff72
+sar53-81n52,10.1016/j.epsl.2023.118277,10.22002/D1.1620,2znta-5t680,No,tka8c-bjp40
+y5gpx-saw63,10.5194/amt-17-5861-2024,10.14291/TCCON.GGG2014,rhrv4-mcp55,No,jn2dg-2h888
+knwz1-dvb78,,10.14291/tccon.ggg2014.pasadena01.r1/1182415,tb378-y1a55,No,8z4qd-mhz12
+dm3mv-q1b76,10.1038/s41524-019-0216-x,10.22002/D1.1256,yrn11-jb916,No,t899g-xww46

From 79e8a38983fad9f35f392f6479e04182315350b7 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Mon, 31 Mar 2025 14:07:36 -0700
Subject: [PATCH 02/19] Update editor.py

File

Update caltechauthors.py

Update caltechauthors.py

Update caltechauthors.py

Update caltechauthors.py

Update caltechauthors.py

Delete ames/harvesters/editor.py

Update caltechauthors.py

Delete ames/harvesters/updated_file.csv

Delete ames/harvesters/output.csv

Update __init__.py

Update __init__.py

Update __init__.py

Create test.py

Update test.py

Create test.yml

Update test.yml

Create test_matchers.py

Update caltechauthors.py

Update caltechauthors.py

Update test_matchers.py

Update test_matchers.py

Delete ames/matchers/test.py

Update caltechauthors.py

Create run_caltechauthors_harvestors.py

Create run_caltechauthors_matchers.py

Rename run_caltechauthors_harvestors.py to run_harvest_links.py

Update caltechauthors.py

Update __init__.py

Create run_caltechauthors_update_links.py

Create run_caltechauthors_get_links.py

Delete run_caltechauthors_matchers.py

Update caltechauthors.py

Update __init__.py

Update caltechauthors.py

Update caltechauthors.py

Update caltechauthors.py

Update caltechauthors.py

Update caltechauthors.py
---
 .github/workflows/test.yml         |  34 +++
 ames/harvesters/__init__.py        |   1 +
 ames/harvesters/caltechauthors.py  | 128 +++++++-----
 ames/harvesters/editor.py          | 133 ------------
 ames/harvesters/output.csv         | 168 ---------------
 ames/matchers/__init__.py          |   1 +
 ames/matchers/caltechauthors.py    | 321 +++++++++++++----------------
 run_caltechauthors_get_links.py    |  16 ++
 run_caltechauthors_update_links.py |  16 ++
 run_harvest_links.py               |  88 ++++++++
 tests/test_matchers.py             |  76 +++++++
 11 files changed, 446 insertions(+), 536 deletions(-)
 create mode 100644 .github/workflows/test.yml
 delete mode 100644 ames/harvesters/editor.py
 delete mode 100644 ames/harvesters/output.csv
 create mode 100644 run_caltechauthors_get_links.py
 create mode 100644 run_caltechauthors_update_links.py
 create mode 100644 run_harvest_links.py
 create mode 100644 tests/test_matchers.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 00000000..867667a5
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,34 @@
+name: Test CaltechAuthors Matcher
+
+on:
+  push:
+    paths:
+      - 'ames/matchers/caltechauthors.py'
+      - 'tests/**'
+  pull_request:
+    paths:
+      - 'ames/matchers/caltechauthors.py'
+      - 'tests/**'
+  workflow_dispatch:
+
+jobs:
+  test-caltechauthors:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt || true  
+
+    - name: Run tests for caltechauthors
+      run: |
+        PYTHONPATH=${{ github.workspace }} python -m unittest discover -s tests -p 'test_matchers.py'
diff --git a/ames/harvesters/__init__.py b/ames/harvesters/__init__.py
index 1add4aea..5e290431 100644
--- a/ames/harvesters/__init__.py
+++ b/ames/harvesters/__init__.py
@@ -24,3 +24,4 @@
 from .caltechauthors import is_file_present
 from .caltechauthors import get_series_records
 from .caltechauthors import generate_data_citation_csv
+from .caltechauthors import get_data_availability_links
diff --git a/ames/harvesters/caltechauthors.py b/ames/harvesters/caltechauthors.py
index 93717bb2..4484fafa 100644
--- a/ames/harvesters/caltechauthors.py
+++ b/ames/harvesters/caltechauthors.py
@@ -334,7 +334,6 @@ def get_records_from_date(date="2023-08-25", test=False):
 
     return hits
 
-
 def doi2url(doi):
     if not doi.startswith("10."):
         return doi
@@ -352,7 +351,6 @@ def doi2url(doi):
                 return resolved_url
     return doi
 
-
 def fetch_metadata(record_id):
     url = f"https://authors.library.caltech.edu/api/records/{record_id}"
     try:
@@ -362,12 +360,11 @@ def fetch_metadata(record_id):
     except:
         return None
 
-
 def search_resource_type(obj):
     if isinstance(obj, dict):
         for k, v in obj.items():
-            if k == "resource_type" and isinstance(v, dict) and "id" in v:
-                return v["id"]
+            if k == 'resource_type' and isinstance(v, dict) and 'id' in v:
+                return v['id']
             result = search_resource_type(v)
             if result:
                 return result
@@ -378,10 +375,8 @@ def search_resource_type(obj):
                 return result
     return None
 
-
 def fetch_resource_type(data):
-    return search_resource_type(data) or "N/A"
-
+    return search_resource_type(data) or 'N/A'
 
 def search_records(prefix):
     base_url = "https://authors.library.caltech.edu/api/records"
@@ -392,7 +387,6 @@ def search_records(prefix):
         return response.json()
     return None
 
-
 def extract_data_citations(hits):
     citations = []
     for hit in hits:
@@ -401,50 +395,38 @@ def extract_data_citations(hits):
         if not metadata:
             continue
 
-        caltechauthors_doi = (
-            metadata.get("pids", {}).get("doi", {}).get("identifier", "")
-        )
+        caltechauthors_doi = metadata.get("pids", {}).get("doi", {}).get("identifier", "")
         resource_type = fetch_resource_type(metadata)
 
         related_dois = []
         for identifier in metadata.get("metadata", {}).get("related_identifiers", []):
             if identifier.get("scheme") == "doi":
                 doi = identifier["identifier"]
-                if any(
-                    doi.startswith(prefix)
-                    for prefix in ["10.22002/", "10.14291/", "10.25989/"]
-                ):
+                if any(doi.startswith(prefix) for prefix in ["10.22002/", "10.14291/", "10.25989/"]):
                     related_dois.append(doi)
 
         for doi in related_dois:
             caltechdata_url = doi2url(doi)
             if "data.caltech.edu/records/" in caltechdata_url:
                 caltechdata_id = caltechdata_url.split("/records/")[-1]
-                caltechdata_metadata = requests.get(
-                    f"https://data.caltech.edu/api/records/{caltechdata_id}"
-                ).json()
+                caltechdata_metadata = requests.get(f"https://data.caltech.edu/api/records/{caltechdata_id}").json()
 
                 cross_link = "No"
-                for identifier in caltechdata_metadata.get("metadata", {}).get(
-                    "related_identifiers", []
-                ):
+                for identifier in caltechdata_metadata.get("metadata", {}).get("related_identifiers", []):
                     if identifier.get("identifier") == caltechauthors_doi:
                         cross_link = "Yes"
                         break
 
-                citations.append(
-                    {
-                        "CaltechAUTHORS_ID": record_id,
-                        "CaltechAUTHORS_DOI": caltechauthors_doi,
-                        "Related_DOI": doi,
-                        "CaltechDATA_ID": caltechdata_id,
-                        "Cross_Link": cross_link,
-                        "resource_type": resource_type,
-                    }
-                )
+                citations.append({
+                    "CaltechAUTHORS_ID": record_id,
+                    "CaltechAUTHORS_DOI": caltechauthors_doi,
+                    "Related_DOI": doi,
+                    "CaltechDATA_ID": caltechdata_id,
+                    "Cross_Link": cross_link,
+                    "resource_type": resource_type
+                })
     return citations
 
-
 def generate_data_citation_csv():
     prefixes = ["10.22002", "10.14291", "10.25989"]
     all_citations = []
@@ -457,26 +439,66 @@ def generate_data_citation_csv():
     output_file = "data_citations_with_type.csv"
     with open(output_file, "w", newline="") as f:
         writer = csv.writer(f)
-        writer.writerow(
-            [
-                "CaltechAUTHORS_ID",
-                "CaltechAUTHORS_DOI",
-                "Related_DOI",
-                "CaltechDATA_ID",
-                "Cross_Link",
-                "resource_type",
-            ]
-        )
+        writer.writerow(["CaltechAUTHORS_ID", "CaltechAUTHORS_DOI", "Related_DOI", "CaltechDATA_ID", "Cross_Link", "resource_type"])
         for citation in all_citations:
-            writer.writerow(
-                [
-                    citation["CaltechAUTHORS_ID"],
-                    citation["CaltechAUTHORS_DOI"],
-                    citation["Related_DOI"],
-                    citation["CaltechDATA_ID"],
-                    citation["Cross_Link"],
-                    citation["resource_type"],
-                ]
-            )
+            writer.writerow([
+                citation["CaltechAUTHORS_ID"],
+                citation["CaltechAUTHORS_DOI"],
+                citation["Related_DOI"],
+                citation["CaltechDATA_ID"],
+                citation["Cross_Link"],
+                citation["resource_type"]
+            ])
 
     print(f"Saved {len(all_citations)} citations to {output_file}")
+
+def get_data_availability_links(token=None, size=25):
+    base_url = "https://authors.library.caltech.edu/api/records?q=metadata.additional_descriptions.type.id%3A%22data-availability%22&size=25&sort=bestmatch"
+    base_file_url_template = "https://authors.library.caltech.edu/api/records/{record_id}/files"
+    
+    token = os.environ.get("RDMTOK")
+    
+    output_file = "test_results_harvesters.csv"
+    
+    headers = {}
+    if token:
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-type": "application/json",
+        }
+    
+    response = requests.get(base_url, headers=headers)
+    if response.status_code != 200:
+        print(f"Error: Unable to fetch records from the API. Status code: {response.status_code}")
+        exit()
+    
+    records = response.json().get("hits", {}).get("hits", [])
+    
+    if not records:
+        print("No records found.")
+        exit()
+    
+    results = []
+    for record in records:
+        record_id = record.get("id")
+        links = record.get("metadata", {}).get("additional_descriptions", [])
+    
+        for link_data in links:
+            description = link_data.get("description", "")
+            links_in_description = extract_https_links(description)
+            for link in links_in_description:
+                classification = classify_link(link)
+                cleaned = clean_link(link)
+                filename = extract_filename_from_link(link)
+                file_present = is_file_present(record_id, filename)
+    
+                results.append({
+                    "record_id": record_id,
+                    "original_link": link,
+                    "classification": classification,
+                    "cleaned_link": cleaned,
+                    "filename": filename,
+                    "file_present": file_present
+                })
+                
+    return results
diff --git a/ames/harvesters/editor.py b/ames/harvesters/editor.py
deleted file mode 100644
index 6a24a38e..00000000
--- a/ames/harvesters/editor.py
+++ /dev/null
@@ -1,133 +0,0 @@
-import csv
-import json
-import requests
-from caltechdata_edit import caltechdata_edit
-
-# Read the CSV file
-records = []
-with open('output.csv', 'r') as csvfile:
-    reader = csv.DictReader(csvfile)
-    for row in reader:
-        records.append(row)
-
-# Access token for authentication
-token = "moaclkv2MHDHoGklIZs7ABF5mmBZNcxKGgmHY4yyHaUAGbCuyO3DNXhehpL7"
-
-# Using the development system (production=False)
-production = False
-base_url = "https://data.caltechlibrary.dev"
-
-# Set up headers for API requests
-headers = {
-    "Authorization": f"Bearer {token}",
-    "Content-type": "application/json",
-}
-
-def add_related_identifier(record_id, doi, caltech_author_id):
-    """Add DOI and CaltechAUTHORS_ID to related identifiers directly using the API"""
-    print(f"Processing Test_ID: {record_id} with DOI: {doi} and CaltechAUTHORS_ID: {caltech_author_id}")
-    
-    # First, get the current record
-    response = requests.get(f"{base_url}/api/records/{record_id}", headers=headers)
-    
-    if response.status_code != 200:
-        print(f"Error fetching record {record_id}: {response.status_code}")
-        print(response.text)
-        return False
-    
-    record_data = response.json()
-    
-    # Check if there's already a draft
-    draft_response = requests.get(f"{base_url}/api/records/{record_id}/draft", headers=headers)
-    
-    if draft_response.status_code == 200:
-        # Use the draft if it exists
-        record_data = draft_response.json()
-    else:
-        # Create a draft if it doesn't exist
-        draft_create_response = requests.post(
-            f"{base_url}/api/records/{record_id}/draft", 
-            headers=headers
-        )
-        
-        if draft_create_response.status_code != 201:
-            print(f"Error creating draft for {record_id}: {draft_create_response.status_code}")
-            print(draft_create_response.text)
-            return False
-        
-        record_data = draft_create_response.json()
-    
-    # Update the related identifiers
-    related_identifiers = record_data.get("metadata", {}).get("related_identifiers", [])
-    if related_identifiers is None:
-        related_identifiers = []
-    
-    # Check if DOI already exists
-    doi_exists = any(identifier.get("identifier") == doi for identifier in related_identifiers)
-    
-    # Check if CaltechAUTHORS_ID URL already exists
-    author_url = f"https://authors.library.caltech.edu/records/{caltech_author_id}"
-    author_url_exists = any(identifier.get("identifier") == author_url for identifier in related_identifiers)
-    
-    # Add the DOI if it doesn't exist
-    if not doi_exists:
-        new_doi_identifier = {
-            "relation_type": {"id": "issupplementedby"},
-            "identifier": doi,
-            "scheme": "doi",
-            "resource_type": {"id": "publication"}
-        }
-        related_identifiers.append(new_doi_identifier)
-        print(f"Adding DOI related identifier: {doi}")
-    else:
-        print(f"DOI {doi} already exists in related identifiers")
-    
-    # Add the CaltechAUTHORS_ID URL if it doesn't exist
-    if not author_url_exists:
-        new_author_identifier = {
-            "relation_type": {"id": "isreferencedby"},
-            "identifier": author_url,
-            "scheme": "url",
-            "resource_type": {"id": "publication"}
-        }
-        related_identifiers.append(new_author_identifier)
-        print(f"Adding CaltechAUTHORS_ID related identifier: {author_url}")
-    else:
-        print(f"CaltechAUTHORS_ID URL {author_url} already exists in related identifiers")
-    
-    record_data["metadata"]["related_identifiers"] = related_identifiers
-    
-    # Update the draft
-    update_response = requests.put(
-        f"{base_url}/api/records/{record_id}/draft",
-        headers=headers,
-        json=record_data
-    )
-    
-    if update_response.status_code != 200:
-        print(f"Error updating draft for {record_id}: {update_response.status_code}")
-        print(update_response.text)
-        return False
-    
-    # Publish the draft
-    publish_response = requests.post(
-        f"{base_url}/api/records/{record_id}/draft/actions/publish",
-        headers=headers
-    )
-    
-    if publish_response.status_code != 202:
-        print(f"Error publishing draft for {record_id}: {publish_response.status_code}")
-        print(publish_response.text)
-        return False
-    
-    print(f"Successfully added related identifier {doi} to {record_id} and published the changes")
-    return True
-
-# Process each record
-for record in records:
-    test_id = record['Test_ID']
-    doi = record['CaltechAUTHORS_DOI']
-    caltech_author_id = record['CaltechAUTHORS_ID']
-    add_related_identifier(test_id, doi, caltech_author_id)
-
-print("Processing complete")
\ No newline at end of file
diff --git a/ames/harvesters/output.csv b/ames/harvesters/output.csv
deleted file mode 100644
index 2d5bc431..00000000
--- a/ames/harvesters/output.csv
+++ /dev/null
@@ -1,168 +0,0 @@
-CaltechAUTHORS_ID,CaltechAUTHORS_DOI,Related_DOI,Data_ID,Cross_Link,Test_ID
-bwww3-z8y74,10.1093/mnras/staa2808,10.22002/D1.1458,3hqgp-jhw61,No,99s7k-d6f58
-dm3mv-q1b76,10.1038/s41524-019-0216-x,10.22002/D1.1256,yrn11-jb916,No,t899g-xww46
-ahsp6-e8a25,10.1186/s40168-020-0785-4,10.22002/D1.1295,88yzp-h0n85,No,ndyrw-3gq31
-n1n6t-pxs56,10.1038/s41586-021-03601-4,10.22002/D1.1455,m47ts-35f81,No,ydg6m-x6q85
-zjfrp-a9k69,10.1126/science.abb3099,10.22002/D1.1444,s3ejh-8rk72,No,9vgt2-8vy76
-jvzym-1mh07,10.1021/acssynbio.1c00592,10.22002/D1.2140,tvy11-z5a48,No,rff39-mtm48
-r8qh8-y4065,10.1128/JCM.01785-21,10.22002/D1.1942,bv2tf-aap55,No,whd0t-96h94
-nrbae-qk103,10.1098/rspa.2021.0561,10.22002/D1.2173,m1b0w-0zs59,No,6atdb-1bj05
-nrbae-qk103,10.1098/rspa.2021.0561,10.22002/D1.2024,e3sea-97705,No,0x4v2-cb178
-dcas8-y4s76,10.1126/science.abg9765,10.22002/D1.1882,s7epj-5ry14,No,c0e9h-2n643
-k3h8s-vjz03,10.1016/j.atmosenv.2021.118809,10.22002/D1.1985,254mc-zpg74,No,4qwbc-5sb81
-9x8wh-65414,10.7554/eLife.65092,10.22002/D1.2076,k9hkr-v0978,No,v22y2-m7n82
-9x8wh-65414,10.7554/eLife.65092,10.22002/D1.2157,g3sp0-33085,No,9eh2f-k1a76
-9x8wh-65414,10.7554/eLife.65092,10.22002/D1.1790,qxm6c-q5p97,No,qn0ny-t3b46
-15qr1-8d538,10.1029/2019je006190,10.22002/D1.1326,dwc1w-r9a68,No,7gpm7-hb420
-344g6-s0176,10.1007/s12678-021-00694-3,10.22002/D1.1632,1km87-52j70,No,fdx2t-3rx08
-nxrw7-zx123,10.1038/s41561-021-00862-6,10.22002/D1.2150,0a0fg-yer22,No,36zbz-rbn41
-pa1b8-a8596,10.1029/2020gl089917,10.22002/D1.1376,9wpke-7eg08,No,358dc-1ck54
-ezbzn-3qz02,10.1038/s41567-021-01492-w,10.22002/D1.2202,59zzx-xhb23,No,kcgnj-he106
-hcg0j-pk874,10.1029/2020gl087477,10.22002/D1.1348,j5fk1-0g306,No,wkwk4-8ym88
-40n3g-jae61,10.1029/2017GC007260,10.22002/D1.320,vymbv-n8p13,No,65wey-dqt49
-40n3g-jae61,10.1029/2017GC007260,10.22002/D1.321,bwxz5-0kc80,No,yz4a8-bd988
-5624m-z4s58,10.1029/2019gc008862,10.22002/D1.1380,mm5dq-05y22,No,aekvq-hh376
-8scha-rhv61,10.1029/2018MS001313,10.22002/D1.933,a49j4-qt740,No,w08gs-1t250
-prs7s-hfr18,10.1038/s41524-022-00747-1,10.22002/D1.1983,dekcc-2tb35,No,2g1py-bnf41
-99bn3-pg365,10.7554/eLife.55308,10.22002/d1.1336,wyrrw-ffe20,No,yd65f-wkz21
-99bn3-pg365,10.7554/eLife.55308,10.22002/D1.1331,8ftre-92r60,No,q9xqq-xrk20
-q0pn9-dgp08,10.1029/2021ea001637,10.22002/D1.2028,p6614-mjw68,No,a7rma-wkb68
-q0pn9-dgp08,10.1029/2021ea001637,10.22002/D1.222,k8gnx-7hj46,No,6ffmj-n7192
-jk4kd-j8n07,10.1029/2020gl091699,10.22002/D1.1666,bk8pf-qvx09,No,gv80y-yq992
-zyge8-axx47,10.5194/se-11-2283-2020,10.22002/D1.1447,6jegf-05x08,No,443sq-jh364
-8xx2v-fk653,10.1093/pnasnexus/pgad033,10.22002/D1.20223,0yw13-j0441,No,szsyt-ytf03
-6famx-f0654,10.1038/s41467-020-16224-6,10.22002/D1.1371,60wg2-9qz17,No,v8shp-3a074
-qjzq0-71624,10.1029/2019jb018597,10.22002/D1.1377,nwj4y-hcm74,No,aan3s-zfk64
-nm7th-hgm94,10.1073/pnas.2102267118,10.22002/D1.1996,10svm-aq733,No,47mgd-ydg26
-zfjpk-xtj13,10.1063/5.0006684,10.22002/D1.1379,dxrpq-xyx02,No,jmzz8-6td18
-kp5yc-avm39,10.1038/s41586-019-1447-1,10.22002/D1.1160,w8hpj-0y065,No,6a7xt-crp87
-c9geb-11h23,10.1073/pnas.1907869116,10.22002/D1.1241,fbv6r-hg153,No,cqnnk-chw84
-zqrkn-f3w82,10.1038/s41524-019-0213-0,10.22002/D1.1178,ehp06-pcf04,No,9r1k1-95d28
-zqrkn-f3w82,10.1038/s41524-019-0213-0,10.22002/D1.1179,p2bsg-7tb62,No,pehkc-etn63
-arj9r-nfc42,10.1038/s41467-019-13262-7,10.22002/D1.1296,2vdrb-bmr68,No,26ts2-f1j05
-kz506-yqx70,10.1029/2019JB018922,10.22002/d1.1328,yen0b-fed04,No,vw4ra-qrp13
-gjzvx-agj04,10.1038/s41598-020-58586-3,10.22002/D1.1298,8d1ar-e8e29,No,gepec-02x71
-ztte2-h5j24,10.1088/1367-2630/ac1144,10.22002/D1.1451,hvtvx-rjq36,No,nz1k8-4ck35
-ngtts-mjd88,10.1029/2021jb021976,10.22002/D1.2009,7tp28-jp627,No,bzqfj-f7077
-ngtts-mjd88,10.1029/2021jb021976,10.22002/D1.2010,e80sv-5py92,No,ffmm8-ckg43
-jemmb-kp969,,10.22002/D1.1921,d2qn0-tse31,No,e3rhe-kdq51
-ke1kq-rk838,10.1038/s41467-022-33449-9,10.22002/D1.20291,9bkyr-21532,No,8b5nn-hbt51
-4jqdv-nrv37,10.1016/j.cels.2022.03.002,10.22002/D1.1693,4ry4k-5gf89,No,h2e81-mz738
-rsg2f-2nj92,10.1126/sciadv.adg6618,10.22002/D1.2090,c2w53-e7q49,No,15m2r-ep710
-qgw1r-cg440,10.1111/sed.13100,10.22002/D1.1645,8zehn-8rr62,No,mzph0-txc86
-w2ev0-snj25,10.1029/2021jf006392,10.22002/D1.8962,yrkvm-9bh56,No,d3ek1-z9x67
-750xa-xag63,,10.22002/D1.1286,zcndp-vg341,No,27jch-y5k25
-j80sb-te308,10.1088/1748-3190/ad277f,10.22002/c5cyj-mev09,c5cyj-mev09,No,86x6j-n4q55
-mypge-8d791,10.1038/s41586-023-06227-w,10.22002/7h65h-89163,7h65h-89163,No,4m1z1-bhc25
-90vmt-dcf44,10.1029/2019je006289,10.22002/D1.1349,kmde9-m7g44,No,y28dp-mfh71
-fx2at-3ps68,10.1029/2019jg005533,10.22002/D1.1226,xc9rx-8qs95,No,rbw1s-e1198
-pjjee-1w296,10.1029/2021MS002671,10.22002/D1.1429,3p6y1-a8b95,No,ymygq-mpz35
-79r10-1a091,10.1029/2021jf006406,10.22002/D1.2078,5qqjp-5g813,No,bkqq3-kfa29
-79r10-1a091,10.1029/2021jf006406,10.22002/D1.9200,dgxbk-45k21,No,9ynk4-ajv10
-4a7y9-nzb92,10.1016/j.cels.2022.03.001,10.22002/D1.1692,5e4cj-34824,No,bw0gp-8tt54
-4a7y9-nzb92,10.1016/j.cels.2022.03.001,10.22002/D1.20047,aymp3-qzt70,No,15qm4-77249
-6k82a-tnj57,10.24349/pjye-gkeo,10.22002/D1.1877,01pg4-5r437,No,h8ps1-0yp52
-1qwbd-2q359,10.1029/2021av000534,10.22002/D1.2176,kasms-vp209,No,e1m0y-cpf40
-dv27e-2sy47,10.1073/pnas.2023433118,10.22002/D1.1667,8rsdg-rxz52,No,kaayd-19r23
-tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1915,653dj-3b761,No,mtzc2-18p79
-tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1900,82zcr-mxa32,No,n1vb7-m1g75
-tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1905,cj7ec-8g780,No,j8p30-za980
-tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1914,5vd8b-sz113,No,8rp1w-qfz76
-tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1919,zh7s1-8g617,No,pqak6-n5333
-tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1917,2t5tj-e5w45,No,w80kf-hyt30
-tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1916,0vgd3-9cp11,No,9w2er-n4713
-tr0p8-5we23,10.1016/j.cell.2021.11.014,10.22002/D1.1918,5x3vq-z1b96,No,zb2dj-d0f86
-h0e90-htt06,10.1029/2021jc018375,10.22002/D1.1296,2vdrb-bmr68,No,26ts2-f1j05
-0f32n-bgm37,10.1029/2021gl095227,10.22002/D1.1347,8hm1f-w5492,No,9tcpx-vf039
-0f32n-bgm37,10.1029/2021gl095227,10.22002/D1.2033,jn6d1-wfj05,No,t8h2f-87s64
-nde62-t1r58,10.1029/2021JB022462,10.22002/D1.1970,h6438-cgy98,No,pbgwc-7b016
-rwr63-xyt69,10.1029/2021gl096503,10.22002/D1.2135,pyr6e-wt732,No,2kvfz-erc91
-r1pva-c8x21,10.1029/2022jb024329,10.22002/D1.20035,wxsas-96g08,No,8tj1d-22780
-rb73g-vqz89,10.1016/j.xcrp.2022.100959,10.22002/D1.20057,bfap4-h2m21,No,px5ss-5y916
-wr8qq-s8t58,10.1109/WHISPERS.2016.8071774,10.22002/D1.222,k8gnx-7hj46,No,6ffmj-n7192
-ynbgx-0tx98,10.1029/2021ms002747,10.22002/D1.1231,meh5c-wy279,No,61shk-rcs26
-6gcmj-reb48,10.1038/s41586-022-04749-3,10.22002/D1.2155,y72mq-emt30,No,5jjz9-40b67
-vpnng-szs82,10.1126/sciadv.abn9545,10.22002/D1.20048,hend5-jzt61,No,sb743-qp239
-1q6gn-mvc46,10.1029/2022ms003105,10.22002/D1.20052,j8mw7-fm491,No,5mdtq-15724
-ck6pf-68621,10.1093/gji/ggab407,10.22002/D1.1955,31emd-wmv98,No,33y3z-2te02
-d20js-7z640,10.1029/2022gl101715,10.22002/vn6v0-pfr77,vn6v0-pfr77,No,dcy6h-wem15
-rsekp-4g847,10.1109/tgrs.2023.3305194,10.22002/D1.1347,8hm1f-w5492,No,9tcpx-vf039
-cmvm1-e9379,10.1029/2023gl105205,10.22002/D1.1347,8hm1f-w5492,No,9tcpx-vf039
-wkjh4-6nf06,10.1101/2022.03.02.22271724,10.22002/D1.20049,pmm08-6q581,No,rtc14-wmr12
-p6mn1-gs660,10.1101/2022.06.17.496478,10.22002/D1.20199,5vng7-8ne78,No,afznk-vbp04
-vtef0-x7037,10.1101/2022.07.13.22277113,10.22002/D1.20223,0yw13-j0441,No,szsyt-ytf03
-q40ve-64h03,10.1002/essoar.10510937.2,10.22002/D1.20052,j8mw7-fm491,No,5mdtq-15724
-4w29e-xpa51,10.1002/essoar.10510458.2,10.22002/D1.1891,n1yye-2z213,No,zeyw2-27p09
-e9yy0-v0658,10.1002/essoar.10511838.1,10.22002/D1.20038,e1acg-e0k08,No,3pg72-ddh27
-x0cjg-kxg25,10.1101/2022.07.13.22277513,10.22002/D1.20223,0yw13-j0441,No,szsyt-ytf03
-jh5g2-fyg97,10.1002/essoar.10512148.1,10.22002/D1.20252,dhfgs-p7319,No,wjhsn-kbv79
-fgbgn-2rk49,10.1002/essoar.10512118.2,10.22002/D1.20248,pybpv-w7661,No,wp4x8-1vc62
-91hvf-49c47,,10.22002/D1.1438,3ejxg-69q72,No,2ths5-hkw28
-yvded-18923,10.1101/2020.07.25.210468,10.22002/D1.1407,fc4k3-75q88,No,c4fh9-f1170
-166f3-tj121,,10.22002/D1.1100,dkdt4-e0x94,No,bcn0f-n7z19
-nb3f9-s8p96,10.1002/essoar.10504190.1,10.22002/D1.1617,4peqr-t0723,No,npzd5-bzb58
-5xkg8-an610,10.1101/762773,10.22002/d1.1276,gye0e-gw976,No,gqpst-s3d05
-24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1915,653dj-3b761,No,mtzc2-18p79
-24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1900,82zcr-mxa32,No,n1vb7-m1g75
-24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1905,cj7ec-8g780,No,j8p30-za980
-24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1914,5vd8b-sz113,No,8rp1w-qfz76
-24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1917,2t5tj-e5w45,No,w80kf-hyt30
-24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1916,0vgd3-9cp11,No,9w2er-n4713
-24wrg-rx971,10.1101/2021.05.13.444042,10.22002/D1.1918,5x3vq-z1b96,No,zb2dj-d0f86
-2nvtm-7qe34,10.1029/2019je006191,10.22002/d1.1304,dmrkv-3xn63,No,91tsz-cfc02
-m07dw-3jq86,10.1038/s41587-019-0372-z,10.22002/D1.1311,a73n8-3pa89,No,dth8s-nba11
-n3qyx-kf341,10.1029/2019je006156,10.22002/D1.1299,df548-mhy96,No,a15h1-f1j49
-ks7h3-tf644,10.1038/s42005-021-00703-3,10.22002/D1.1858,vp3y4-ef326,No,2f5p2-2e148
-8mp48-93523,,10.22002/D1.1315,d8w02-xng39,No,q0346-mn915
-a7vwv-mhw66,10.1101/2022.07.10.499405,10.22002/D1.20215,awtz3-tz122,No,xfqva-b4991
-h26g5-k0311,10.1029/2018ea000416,10.22002/D1.1211,5ptpj-x8c08,No,7f5x5-pa209
-h26g5-k0311,10.1029/2018ea000416,10.22002/D1.1212,3j8a8-bzn05,No,xk40w-p0g08
-y47b9-pmn34,10.48550/arXiv.1905.06360,10.22002/D1.1241,fbv6r-hg153,No,cqnnk-chw84
-zf2v5-vx810,10.1029/2018JE005706,10.22002/D1.1087,hhzzq-yw058,No,e67v1-3kg31
-zf2v5-vx810,10.1029/2018JE005706,10.22002/D1.1085,44km0-er448,No,g867t-x4d04
-zf2v5-vx810,10.1029/2018JE005706,10.22002/D1.1086,c8fwc-kvg38,No,c1gj1-hnb74
-k6jp7-8js32,10.1029/2019gl086424,10.22002/d1.1317,rdv1n-st737,No,8kgzq-0z165
-se7bz-k9a90,10.1029/2019jb018855,10.22002/D1.1293,mtn3h-frk09,No,hrpxj-0tr80
-frxqw-qyw74,10.1063/1.5054927,10.22002/D1.305,0t2qn-hvq19,No,f3cwe-hgm53
-dzgj9-y8y49,10.3390/rs12213586,10.22002/d1.1182,45st6-jvh02,No,xk1ex-axk10
-ge4t4-7e026,10.1029/2019je006298,10.22002/D1.1318,gyvwk-btq07,No,te8rh-sxq88
-ge4t4-7e026,10.1029/2019je006298,10.22002/D1.1211,5ptpj-x8c08,No,7f5x5-pa209
-ze84b-v1782,10.1101/2020.12.09.20239467,10.22002/D1.1702,24265-gtd53,No,y98md-bp961
-d4h00-xf206,10.1029/2022jb025425,10.22002/D1.20252,dhfgs-p7319,No,wjhsn-kbv79
-x179p-jhv14,10.7554/elife.85370,10.22002/D1.2157,g3sp0-33085,No,9eh2f-k1a76
-ywtpy-nka66,10.1038/s41929-021-00618-w,10.22002/D1.1632,1km87-52j70,No,fdx2t-3rx08
-zwfrv-rbn84,10.1038/s41467-021-25443-4,10.22002/D1.2032,jvjkh-d9g50,No,x6gw8-as345
-y99cf-fkr11,10.1016/j.icarus.2022.115079,10.22002/D1.20170,3ra81-96y32,No,fddq3-zmn81
-ernz6-2xp43,10.1101/2022.03.21.484932,10.22002/D1.20060,hwv5v-m9x76,No,vfz20-ydp76
-rxd0x-ag678,10.1029/2020JE006675,10.22002/D1.1617,4peqr-t0723,No,npzd5-bzb58
-t2ser-vfe73,10.1029/2020ja027796,10.22002/D1.1333,r35r5-sb884,No,ngxcf-0a955
-v84d4-vcs34,10.1038/s41586-020-2872-x,10.22002/D1.1647,jhn25-fsd29,No,24b5z-t4m60
-c03z0-nfg11,10.1038/s41598-020-77073-3,10.22002/D1.1407,fc4k3-75q88,No,c4fh9-f1170
-4fdtn-y2e21,10.1029/2020je006606,10.22002/D1.1628,d5jt1-wqt82,No,wx24e-5yd65
-ct2sc-f7m12,10.1029/2019av000140,10.22002/D1.1347,8hm1f-w5492,No,9tcpx-vf039
-rdqe2-hsq97,10.1029/2020jb021369,10.22002/D1.1670,e200b-xsm06,No,4qv0g-7yt28
-363j8-nw138,10.1038/s41587-021-00870-2,10.22002/D1.1876,7704n-f6m57,No,3gz1s-d5261
-r7rsd-a7a17,,10.22002/D1.2026,51sah-d9r47,No,mn3a1-x3x94
-r7rsd-a7a17,,10.22002/D1.2025,p8ppf-7ff93,No,e1acb-n8a94
-6f3gb-97h58,10.1126/science.abg2947,10.22002/D1.1976,zpzee-79351,No,px1w1-xqr37
-wfg20-4tn76,10.1029/2020tc006210,10.22002/D1.1388,8dhjv-rvf91,No,zwjd2-rw873
-n2b9p-8jf71,10.1029/2021JB021886,10.22002/D1.1612,ryqnw-bdf94,No,mzwax-jw720
-30s9e-9z096,10.1029/2021jb022676,10.22002/D1.2009,7tp28-jp627,No,bzqfj-f7077
-30s9e-9z096,10.1029/2021jb022676,10.22002/D1.2141,dwkqy-hkj69,No,e0zwb-f9s86
-30s9e-9z096,10.1029/2021jb022676,10.22002/D1.2142,665w3-pbj51,No,dygfc-38b42
-fx63v-hsd80,10.1029/2021gl092598,10.22002/D1.2023,m1r58-kvb98,No,ft45b-8nw98
-vfg98-9zt14,10.5194/gmd-14-6309-2021,10.22002/D1.971,8mkmg-c2938,No,3ydw5-6yx19
-b0xvf-xn162,10.1038/s41561-021-00706-3,10.22002/D1.1874,3hrqe-5x450,No,7m4ep-06188
-b0xvf-xn162,10.1038/s41561-021-00706-3,10.22002/D1.1873,8ydek-yt879,No,m86ym-m3603
-08dr4-w6943,10.1029/2021je006828,10.22002/D1.1971,gv4qf-qwa77,No,mm42q-wft65
-yzjs7-1cv55,10.1016/j.jsb.2022.107860,10.22002/D1.2096,2vtv3-pp862,No,tyest-mem06
-yzjs7-1cv55,10.1016/j.jsb.2022.107860,10.22002/D1.2099,tz8jq-0mk77,No,yypym-x5693
-yzjs7-1cv55,10.1016/j.jsb.2022.107860,10.22002/D1.2103,kfkqj-q6557,No,4khh3-dpp37
-0mmmp-9pz59,10.1029/2023jb026488,10.22002/D1.20248,pybpv-w7661,No,wp4x8-1vc62
-phbx7-m8a69,10.1088/2515-7655/ac817e,10.22002/D1.20061,q9zpw-g8s64,No,skmrx-14a11
-r5hzq-dzy83,10.2110/jsr.2022.032,10.22002/D1.20044,hzwqz-5wr07,No,0y3zw-g1n05
-sar53-81n52,10.1016/j.epsl.2023.118277,10.22002/D1.1619,s6ey4-qpm11,No,ac32t-mff72
-sar53-81n52,10.1016/j.epsl.2023.118277,10.22002/D1.1620,2znta-5t680,No,tka8c-bjp40
-y5gpx-saw63,10.5194/amt-17-5861-2024,10.14291/TCCON.GGG2014,rhrv4-mcp55,No,jn2dg-2h888
-knwz1-dvb78,,10.14291/tccon.ggg2014.pasadena01.r1/1182415,tb378-y1a55,No,8z4qd-mhz12
-dm3mv-q1b76,10.1038/s41524-019-0216-x,10.22002/D1.1256,yrn11-jb916,No,t899g-xww46
diff --git a/ames/matchers/__init__.py b/ames/matchers/__init__.py
index a5923f56..69fb09bc 100644
--- a/ames/matchers/__init__.py
+++ b/ames/matchers/__init__.py
@@ -24,3 +24,4 @@
 from .caltechauthors import save_metadata_to_file
 from .caltechauthors import add_related_identifiers_from_csv
 from .caltechauthors import add_authors_affiliations
+from .caltechauthors import process_link_updates
diff --git a/ames/matchers/caltechauthors.py b/ames/matchers/caltechauthors.py
index 29fd02af..02017c85 100644
--- a/ames/matchers/caltechauthors.py
+++ b/ames/matchers/caltechauthors.py
@@ -342,201 +342,158 @@ def move_doi(record, token, test=False):
         )
 
 
-def add_related_identifiers_from_csv(csv_path, test=False):
+def add_related_identifiers_from_csv(data_rows, token, test=False):
     """Reads a CSV file and adds related identifiers to each record using the CaltechDATA API."""
 
-    base_url = (
-        "https://data.caltechlibrary.dev"
-        if test
-        else "https://data.caltechlibrary.caltech.edu"
-    )
+    base_url = "https://data.caltechlibrary.dev" if test else "https://data.caltechlibrary.caltech.edu"
     headers = {
         "Authorization": f"Bearer {token}",
         "Content-type": "application/json",
     }
+    results = []
+    for row in data_rows:
+        record_id = row['Test_ID']
+        doi = row['CaltechAUTHORS_DOI']
+        caltech_author_id = row['CaltechAUTHORS_ID']
+        resource_type = row['resource_type']
+
+        print(f"\nProcessing Test_ID: {record_id} with DOI: {doi} and CaltechAUTHORS_ID: {caltech_author_id}")
+        print(f"Using resource_type: {resource_type}")
+
+        # Fetch the current record
+        response = requests.get(f"{base_url}/api/records/{record_id}", headers=headers)
+        if response.status_code != 200:
+            print(f"Error fetching record {record_id}: {response.status_code}")
+            continue
+        record_data = response.json()
 
-    with open(csv_path, "r") as csvfile:
-        reader = csv.DictReader(csvfile)
-        for row in reader:
-            record_id = row["Test_ID"]
-            doi = row["CaltechAUTHORS_DOI"]
-            caltech_author_id = row["CaltechAUTHORS_ID"]
-            resource_type = row["resource_type"]
-
-            print(
-                f"\nProcessing Test_ID: {record_id} with DOI: {doi} and CaltechAUTHORS_ID: {caltech_author_id}"
-            )
-            print(f"Using resource_type: {resource_type}")
-
-            # Fetch the current record
-            response = requests.get(
-                f"{base_url}/api/records/{record_id}", headers=headers
-            )
-            if response.status_code != 200:
-                print(f"Error fetching record {record_id}: {response.status_code}")
+        # Draft check or create
+        draft_response = requests.get(f"{base_url}/api/records/{record_id}/draft", headers=headers)
+        if draft_response.status_code == 200:
+            record_data = draft_response.json()
+        else:
+            draft_create_response = requests.post(f"{base_url}/api/records/{record_id}/draft", headers=headers)
+            if draft_create_response.status_code != 201:
+                print(f"Error creating draft: {draft_create_response.status_code}")
                 continue
-            record_data = response.json()
-
-            # Draft check or create
-            draft_response = requests.get(
-                f"{base_url}/api/records/{record_id}/draft", headers=headers
-            )
-            if draft_response.status_code == 200:
-                record_data = draft_response.json()
-            else:
-                draft_create_response = requests.post(
-                    f"{base_url}/api/records/{record_id}/draft", headers=headers
-                )
-                if draft_create_response.status_code != 201:
-                    print(f"Error creating draft: {draft_create_response.status_code}")
-                    continue
-                record_data = draft_create_response.json()
-
-            related_identifiers = (
-                record_data.get("metadata", {}).get("related_identifiers", []) or []
-            )
+            record_data = draft_create_response.json()
+
+        related_identifiers = record_data.get("metadata", {}).get("related_identifiers", []) or []
+
+        doi_exists = any(ri.get("identifier") == doi for ri in related_identifiers)
+        author_url = f"https://authors.library.caltech.edu/records/{caltech_author_id}"
+        author_url_exists = any(ri.get("identifier") == author_url for ri in related_identifiers)
+
+        if not doi_exists:
+            related_identifiers.append({
+                "relation_type": {"id": "issupplementedby"},
+                "identifier": doi,
+                "scheme": "doi",
+                "resource_type": {"id": resource_type}
+            })
+            print(f"Adding DOI: {doi}")
+        else:
+            print(f"DOI already exists")
+
+        if not author_url_exists:
+            related_identifiers.append({
+                "relation_type": {"id": "isreferencedby"},
+                "identifier": author_url,
+                "scheme": "url",
+                "resource_type": {"id": resource_type}
+            })
+            print(f"Adding CaltechAUTHORS link: {author_url}")
+        else:
+            print(f"CaltechAUTHORS link already exists")
 
-            doi_exists = any(ri.get("identifier") == doi for ri in related_identifiers)
-            author_url = (
-                f"https://authors.library.caltech.edu/records/{caltech_author_id}"
-            )
-            author_url_exists = any(
-                ri.get("identifier") == author_url for ri in related_identifiers
-            )
+        record_data["metadata"]["related_identifiers"] = related_identifiers
 
-            if not doi_exists:
-                related_identifiers.append(
-                    {
-                        "relation_type": {"id": "issupplementedby"},
-                        "identifier": doi,
-                        "scheme": "doi",
-                        "resource_type": {"id": resource_type},
-                    }
-                )
-                print(f"Adding DOI: {doi}")
-            else:
-                print(f"DOI already exists")
-
-            if not author_url_exists:
-                related_identifiers.append(
-                    {
-                        "relation_type": {"id": "isreferencedby"},
-                        "identifier": author_url,
-                        "scheme": "url",
-                        "resource_type": {"id": resource_type},
-                    }
-                )
-                print(f"Adding CaltechAUTHORS link: {author_url}")
-            else:
-                print(f"CaltechAUTHORS link already exists")
+        update_response = requests.put(
+            f"{base_url}/api/records/{record_id}/draft", headers=headers, json=record_data
+        )
+        if update_response.status_code != 200:
+            print(f"Error updating draft: {update_response.status_code}")
+            continue
 
-            record_data["metadata"]["related_identifiers"] = related_identifiers
+        publish_response = requests.post(
+            f"{base_url}/api/records/{record_id}/draft/actions/publish", headers=headers
+        )
+        if publish_response.status_code != 202:
+            print(f"Error publishing record {record_id}: {publish_response.status_code}")
+            results.append((record_id, False))
+            continue
 
-            update_response = requests.put(
-                f"{base_url}/api/records/{record_id}/draft",
-                headers=headers,
-                json=record_data,
-            )
-            if update_response.status_code != 200:
-                print(f"Error updating draft: {update_response.status_code}")
-                continue
+        print(f"Successfully updated and published {record_id}")
+        results.append((record_id, True))
+    return results
 
-            publish_response = requests.post(
-                f"{base_url}/api/records/{record_id}/draft/actions/publish",
-                headers=headers,
+def process_link_updates(input_csv):
+    # read the CSV file and build a dictionary: record_id -> {"links": [(link, classification), ...]}
+    records_data = {}
+    with open(input_file, newline="") as f:
+        reader = csv.DictReader(f, delimiter=",")
+        for row in reader:
+            record_id = row["record_id"].strip()
+            link = row["link"].strip()
+            classification = row["classification"].strip()
+
+            if record_id not in records_data:
+                records_data[record_id] = {
+                    "links": []
+                }
+            records_data[record_id]["links"].append((link, classification))
+
+    results = []
+
+    for record_id, record_info in records_data.items():
+        print(f"Processing record {record_id}")
+
+        # get metadata for the record
+        metadata = get_record_metadata(record_id)
+        if not metadata:
+            # if we failed to get metadata, record the error and continue
+            first_link = record_info["links"][0][0] if record_info["links"] else ""
+            results.append(
+                {
+                    "record_id": record_id,
+                    "link": first_link,
+                    "doi_check": None,
+                    "metadata_updated": False,
+                    "notes": "Failed to retrieve metadata",
+                }
             )
-            if publish_response.status_code != 202:
-                print(
-                    f"Error publishing record {record_id}: {publish_response.status_code}"
-                )
-                continue
-
-            print(f"Successfully updated and published {record_id}")
-
-    print("All records processed.")
-
-
-def add_authors_affiliations(record, token, dimensions_key, allowed_identifiers=None):
-    # Add dimensions affiliations to a record
+            continue
 
-    record_id = record["id"]
-    if "doi" in record["pids"]:
-        doi = record["pids"]["doi"]["identifier"]
-    else:
-        doi = None
-        if "identifiers" in record["metadata"]:
-            for idv in record["metadata"]["identifiers"]:
-                if idv["scheme"] == "doi":
-                    doi = idv["identifier"]
-    if doi:
-        endpoint = "https://cris-api.dimensions.ai/v3"
-        dimcli.login(key=dimensions_key, endpoint=endpoint, verbose=False)
-        dsl = dimcli.Dsl()
-        res = dsl.query_iterative(
-            f"""
-        search publications
-        where doi = "{doi}"
-        return publications[basics+extras+abstract] """,
-            verbose=False,
+        # check existing related identifiers in the record
+        related_identifiers = metadata.get("metadata", {}).get("related_identifiers", [])
+
+        # run check_doi if a "doi" is present among the links
+        doi_check = None
+        for (lk, ctype) in record_info["links"]:
+            if ctype.lower() == "doi":
+                try:
+                    doi_check = check_doi(lk, production=True)
+                except Exception as e:
+                    doi_check = f"Error: {str(e)}"
+
+        # update related identifiers
+        updated_metadata, updated_flag = update_related_identifiers(
+            metadata, record_info["links"], source_type="data"  
         )
-        publication = res.json["publications"]
-        update = False
-        if len(publication) == 1:
-            publication = publication[0]
-            dimensions_authors = publication.get("authors", [])
-            existing_authors = record["metadata"]["creators"]
-            if len(dimensions_authors) == len(existing_authors):
-                for position in range(len(dimensions_authors)):
-                    author = existing_authors[position]
-                    dimensions_author = dimensions_authors[position]
-                    if "affiliations" not in author:
-                        affiliations = []
-                        affiliation_ids = []
-                        if dimensions_author["affiliations"] not in [[], None]:
-                            for affiliation in dimensions_author["affiliations"]:
-                                affil = {}
-                                if "id" in affiliation:
-                                    if affiliation["id"] is not None:
-                                        ror = grid_to_ror(affiliation["id"])
-                                        if ror is not None:
-                                            if allowed_identifiers is not None:
-                                                if ror in allowed_identifiers:
-                                                    affil["id"] = ror
-                                                else:
-                                                    print(
-                                                        "ROR %s not in allowed identifiers list"
-                                                        % ror
-                                                    )
-                                        else:
-                                            print(
-                                                "Missing ROR for affiliation %s"
-                                                % affiliation["id"]
-                                            )
-                                # We have to manually handle incorrectly mapped JPL
-                                # affiliations
-                                if "raw_affiliation" in affiliation:
-                                    raw = affiliation["raw_affiliation"]
-                                    affil["name"] = raw
-                                    if "91109" in raw:
-                                        affil["id"] = "027k65916"
-                                    if "Jet Propulsion Laboratory" in raw:
-                                        affil["id"] = "027k65916"
-                                    if "JPL" in raw:
-                                        affil["id"] = "027k65916"
-                                # Some dimensions records don't include id values.
-                                # We ignore those for now
-                                if "id" in affil:
-                                    if affil["id"] not in affiliation_ids:
-                                        update = True
-                                        affiliation_ids.append(affil["id"])
-                                        affiliations.append(affil)
-                            existing_authors[position]["affiliations"] = affiliations
-        if update:
-            caltechdata_edit(
-                record_id,
-                metadata=record,
-                token=token,
-                production=True,
-                publish=True,
-                authors=True,
-            )
+        if updated_flag:
+            # saving to local JSON file for reference
+            save_metadata_to_file(updated_metadata, record_id)
+            pass
+
+        # preparing the final row for the results CSV
+        first_link = record_info["links"][0][0] if record_info["links"] else ""
+        results.append(
+            {
+                "record_id": record_id,
+                "link": first_link,
+                "doi_check": doi_check,
+                "metadata_updated": updated_flag,
+                "notes": "",
+            }
+        )
+    return results
diff --git a/run_caltechauthors_get_links.py b/run_caltechauthors_get_links.py
new file mode 100644
index 00000000..cd486026
--- /dev/null
+++ b/run_caltechauthors_get_links.py
@@ -0,0 +1,16 @@
+from ames.harvesters.caltechauthors import get_data_availability_links
+import csv
+import os
+
+output_file = "test_results_get_links.csv"
+token = os.environ.get("RDMTOK")
+results = get_data_availability_links(token=token)
+
+if results:
+    with open(output_file, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=results[0].keys())
+        writer.writeheader()
+        writer.writerows(results)
+    print(f"Saved {len(results)} links to {output_file}")
+else:
+    print("No results.")
diff --git a/run_caltechauthors_update_links.py b/run_caltechauthors_update_links.py
new file mode 100644
index 00000000..7f58fc48
--- /dev/null
+++ b/run_caltechauthors_update_links.py
@@ -0,0 +1,16 @@
+from ames.matchers.caltechauthors import process_link_updates
+import csv
+
+input_file = "non_publisher_links.csv"
+output_file = "test_results_update_links.csv"
+
+results = process_link_updates(input_file)
+
+if results:
+    with open(output_file, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=results[0].keys())
+        writer.writeheader()
+        writer.writerows(results)
+    print(f"Saved update results to {output_file}")
+else:
+    print("No results.")
diff --git a/run_harvest_links.py b/run_harvest_links.py
new file mode 100644
index 00000000..3d71948c
--- /dev/null
+++ b/run_harvest_links.py
@@ -0,0 +1,88 @@
+import os
+import csv
+import json
+import requests
+
+from ames.matchers.caltechauthors import (
+    get_record_metadata,
+    update_related_identifiers,
+    save_metadata_to_file,
+    check_doi,
+)
+
+
+def main():
+    input_file = "non_publisher_links.csv"
+    output_file = "test_results_matchers.csv"
+
+    # read the CSV file and build a dictionary: record_id -> {"links": [(link, classification), ...]}
+    records_data = {}
+    with open(input_file, newline="") as f:
+        reader = csv.DictReader(f, delimiter=",")
+        for row in reader:
+            record_id = row["record_id"].strip()
+            link = row["link"].strip()
+            classification = row["classification"].strip()
+
+            if record_id not in records_data:
+                records_data[record_id] = {
+                    "links": []
+                }
+            records_data[record_id]["links"].append((link, classification))
+
+    results = []
+
+    for record_id, record_info in records_data.items():
+        print(f"Processing record {record_id}")
+
+        # get metadata for the record
+        metadata = get_record_metadata(record_id)
+        if not metadata:
+            # if we failed to get metadata, record the error and continue
+            first_link = record_info["links"][0][0] if record_info["links"] else ""
+            results.append(
+                {
+                    "record_id": record_id,
+                    "link": first_link,
+                    "doi_check": None,
+                    "metadata_updated": False,
+                    "notes": "Failed to retrieve metadata",
+                }
+            )
+            continue
+
+        # check existing related identifiers in the record
+        related_identifiers = metadata.get("metadata", {}).get("related_identifiers", [])
+
+        # run check_doi if a "doi" is present among the links
+        doi_check = None
+        for (lk, ctype) in record_info["links"]:
+            if ctype.lower() == "doi":
+                try:
+                    doi_check = check_doi(lk, production=True)
+                except Exception as e:
+                    doi_check = f"Error: {str(e)}"
+
+        # update related identifiers
+        updated_metadata, updated_flag = update_related_identifiers(
+            metadata, record_info["links"], source_type="data"  
+        )
+        if updated_flag:
+            # saving to local JSON file for reference
+            save_metadata_to_file(updated_metadata, record_id)
+            pass
+
+        # preparing the final row for the results CSV
+        first_link = record_info["links"][0][0] if record_info["links"] else ""
+        results.append(
+            {
+                "record_id": record_id,
+                "link": first_link,
+                "doi_check": doi_check,
+                "metadata_updated": updated_flag,
+                "notes": "",
+            }
+        )
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_matchers.py b/tests/test_matchers.py
new file mode 100644
index 00000000..9e8c5840
--- /dev/null
+++ b/tests/test_matchers.py
@@ -0,0 +1,76 @@
+import os
+import unittest
+import csv
+import random
+import requests
+import sys
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from ames.matchers.caltechauthors import add_related_identifiers_from_csv
+
+token = "0UrVehnBSM9c7DQZZCM2EtB4lpuEwbTiLue6rf0Vme3lnzswlMA9whjJbmhX"
+CSV_FILE = "test.csv"
+
+def load_test_data(from_csv=True):
+    if from_csv and os.path.exists(CSV_FILE):
+        with open(CSV_FILE, 'r') as f:
+            reader = csv.DictReader(f)
+            return list(reader)
+    else:
+        return [{
+            "CaltechAUTHORS_ID": "bwww3-z8y74",
+            "CaltechAUTHORS_DOI": f"10.1093/mnras/staa{random.randint(1000, 9999)}",
+            "Related_DOI": "10.22002/D1.1458",
+            "Data_ID": "3hqgp-jhw61",
+            "Cross_Link": "No",
+            "Test_ID": "99s7k-d6f58",
+            "resource_type": "publication-article"
+        }]
+
+def verify_related_identifiers_on_site(data_rows, test=False):
+    base_url = "https://data.caltechlibrary.dev" if test else "https://data.caltechlibrary.caltech.edu"
+    headers = {"Authorization": f"Bearer {token}"}
+    results = []
+
+    for row in data_rows:
+        record_id = row['Test_ID']
+        doi = row['CaltechAUTHORS_DOI']
+        caltech_author_id = row['CaltechAUTHORS_ID']
+        author_url = f"https://authors.library.caltech.edu/records/{caltech_author_id}"
+
+        r = requests.get(f"{base_url}/api/records/{record_id}", headers=headers)
+        if r.status_code != 200:
+            print(f"❌ Could not fetch record {record_id}")
+            results.append((record_id, False))
+            continue
+
+        metadata = r.json().get("metadata", {})
+        related = metadata.get("related_identifiers", [])
+        found_doi = any(x["identifier"] == doi for x in related)
+        found_author = any(x["identifier"] == author_url for x in related)
+
+        if found_doi and found_author:
+            print(f"✅ Verified: {record_id}")
+            results.append((record_id, True))
+        else:
+            print(f"❌ Verification failed: {record_id}")
+            results.append((record_id, False))
+
+    return results
+
+class TestCaltechDataUploader(unittest.TestCase):
+
+    def test_add_and_verify_related_identifiers(self):
+        test_data = load_test_data(from_csv=False)  # <-- change this flag to toggle source
+        upload_results = add_related_identifiers_from_csv(test_data, token, test=True)
+        for record_id, success in upload_results:
+            self.assertTrue(success, f"❌ Upload failed for record {record_id}")
+
+        verify_results = verify_related_identifiers_on_site(test_data, test=True)
+        for record_id, success in verify_results:
+            self.assertTrue(success, f"❌ Verification failed for record {record_id}")
+
+
+if __name__ == "__main__":
+    unittest.main()

From 25b94d800336e2f9063ee02362785c9fdd2592ae Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <RohanBhattaraiNP@users.noreply.github.com>
Date: Mon, 19 May 2025 09:56:39 +0000
Subject: [PATCH 03/19] Add updated CITATION.cff from codemeta.json file

---
 CITATION.cff | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CITATION.cff b/CITATION.cff
index 4c17ad48..5900c905 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -21,4 +21,4 @@ keywords:
   - GitHub
   - metadata
   - software
-date-released: 2025-05-07
+date-released: 2025-05-19

From d9318e55d30a0e3522184c24a135883ebd737354 Mon Sep 17 00:00:00 2001
From: Rohan Bhattarai <rbhattar@caltech.edu>
Date: Mon, 19 May 2025 14:44:34 -0700
Subject: [PATCH 04/19] Black_Formatting

---
 add_orcid_script.py                    |   8 +-
 ames/harvesters/caltechauthors.py      | 128 ++++++++++++++++---------
 ames/matchers/caltechauthors.py        |  83 ++++++++++------
 run_archives_report.py                 |   5 +-
 run_authors_affiliation_enhancement.py |  12 +--
 run_authors_group_report.py            |   6 +-
 run_authors_name_update.py             |   2 +-
 run_caltechauthors_harvestors.py       |  28 +++---
 run_caltechauthors_matchers.py         |  13 +--
 run_harvest_links.py                   |  13 +--
 tests/test_matchers.py                 |  43 +++++----
 11 files changed, 211 insertions(+), 130 deletions(-)

diff --git a/add_orcid_script.py b/add_orcid_script.py
index 92d0d6f4..0bd59160 100644
--- a/add_orcid_script.py
+++ b/add_orcid_script.py
@@ -1,9 +1,11 @@
-import csv,os
+import csv, os
 
-with open('orcids.csv', 'r') as f:
+with open("orcids.csv", "r") as f:
     reader = csv.reader(f)
     orcid_list = list(reader)
     for orcid_data in orcid_list:
         orcid = orcid_data[8]
         clpid = orcid_data[10]
-        os.system(f'python run_authors_name_update.py {clpid} {orcid} -add -new-scheme orcid')
+        os.system(
+            f"python run_authors_name_update.py {clpid} {orcid} -add -new-scheme orcid"
+        )
diff --git a/ames/harvesters/caltechauthors.py b/ames/harvesters/caltechauthors.py
index 326d73d9..b68fd60d 100644
--- a/ames/harvesters/caltechauthors.py
+++ b/ames/harvesters/caltechauthors.py
@@ -205,9 +205,7 @@ def get_author_records(
     query = f'?q=metadata.creators.person_or_org.identifiers.identifier%3A"{author_identifier}"'
 
     if date:
-        query += (
-            f"%20AND%20metadata.publication_date%3A%5B{date}%20TO%20%2A%20%5D"
-        )
+        query += f"%20AND%20metadata.publication_date%3A%5B{date}%20TO%20%2A%20%5D"
 
     if token:
         headers = {
@@ -334,6 +332,7 @@ def get_records_from_date(date="2023-08-25", test=False):
 
     return hits
 
+
 def doi2url(doi):
     if not doi.startswith("10."):
         return doi
@@ -351,6 +350,7 @@ def doi2url(doi):
                 return resolved_url
     return doi
 
+
 def fetch_metadata(record_id):
     url = f"https://authors.library.caltech.edu/api/records/{record_id}"
     try:
@@ -360,11 +360,12 @@ def fetch_metadata(record_id):
     except:
         return None
 
+
 def search_resource_type(obj):
     if isinstance(obj, dict):
         for k, v in obj.items():
-            if k == 'resource_type' and isinstance(v, dict) and 'id' in v:
-                return v['id']
+            if k == "resource_type" and isinstance(v, dict) and "id" in v:
+                return v["id"]
             result = search_resource_type(v)
             if result:
                 return result
@@ -375,8 +376,10 @@ def search_resource_type(obj):
                 return result
     return None
 
+
 def fetch_resource_type(data):
-    return search_resource_type(data) or 'N/A'
+    return search_resource_type(data) or "N/A"
+
 
 def search_records(prefix):
     base_url = "https://authors.library.caltech.edu/api/records"
@@ -387,6 +390,7 @@ def search_records(prefix):
         return response.json()
     return None
 
+
 def extract_data_citations(hits):
     citations = []
     for hit in hits:
@@ -395,38 +399,50 @@ def extract_data_citations(hits):
         if not metadata:
             continue
 
-        caltechauthors_doi = metadata.get("pids", {}).get("doi", {}).get("identifier", "")
+        caltechauthors_doi = (
+            metadata.get("pids", {}).get("doi", {}).get("identifier", "")
+        )
         resource_type = fetch_resource_type(metadata)
 
         related_dois = []
         for identifier in metadata.get("metadata", {}).get("related_identifiers", []):
             if identifier.get("scheme") == "doi":
                 doi = identifier["identifier"]
-                if any(doi.startswith(prefix) for prefix in ["10.22002/", "10.14291/", "10.25989/"]):
+                if any(
+                    doi.startswith(prefix)
+                    for prefix in ["10.22002/", "10.14291/", "10.25989/"]
+                ):
                     related_dois.append(doi)
 
         for doi in related_dois:
             caltechdata_url = doi2url(doi)
             if "data.caltech.edu/records/" in caltechdata_url:
                 caltechdata_id = caltechdata_url.split("/records/")[-1]
-                caltechdata_metadata = requests.get(f"https://data.caltech.edu/api/records/{caltechdata_id}").json()
+                caltechdata_metadata = requests.get(
+                    f"https://data.caltech.edu/api/records/{caltechdata_id}"
+                ).json()
 
                 cross_link = "No"
-                for identifier in caltechdata_metadata.get("metadata", {}).get("related_identifiers", []):
+                for identifier in caltechdata_metadata.get("metadata", {}).get(
+                    "related_identifiers", []
+                ):
                     if identifier.get("identifier") == caltechauthors_doi:
                         cross_link = "Yes"
                         break
 
-                citations.append({
-                    "CaltechAUTHORS_ID": record_id,
-                    "CaltechAUTHORS_DOI": caltechauthors_doi,
-                    "Related_DOI": doi,
-                    "CaltechDATA_ID": caltechdata_id,
-                    "Cross_Link": cross_link,
-                    "resource_type": resource_type
-                })
+                citations.append(
+                    {
+                        "CaltechAUTHORS_ID": record_id,
+                        "CaltechAUTHORS_DOI": caltechauthors_doi,
+                        "Related_DOI": doi,
+                        "CaltechDATA_ID": caltechdata_id,
+                        "Cross_Link": cross_link,
+                        "resource_type": resource_type,
+                    }
+                )
     return citations
 
+
 def generate_data_citation_csv():
     prefixes = ["10.22002", "10.14291", "10.25989"]
     all_citations = []
@@ -439,50 +455,66 @@ def generate_data_citation_csv():
     output_file = "data_citations_with_type.csv"
     with open(output_file, "w", newline="") as f:
         writer = csv.writer(f)
-        writer.writerow(["CaltechAUTHORS_ID", "CaltechAUTHORS_DOI", "Related_DOI", "CaltechDATA_ID", "Cross_Link", "resource_type"])
+        writer.writerow(
+            [
+                "CaltechAUTHORS_ID",
+                "CaltechAUTHORS_DOI",
+                "Related_DOI",
+                "CaltechDATA_ID",
+                "Cross_Link",
+                "resource_type",
+            ]
+        )
         for citation in all_citations:
-            writer.writerow([
-                citation["CaltechAUTHORS_ID"],
-                citation["CaltechAUTHORS_DOI"],
-                citation["Related_DOI"],
-                citation["CaltechDATA_ID"],
-                citation["Cross_Link"],
-                citation["resource_type"]
-            ])
+            writer.writerow(
+                [
+                    citation["CaltechAUTHORS_ID"],
+                    citation["CaltechAUTHORS_DOI"],
+                    citation["Related_DOI"],
+                    citation["CaltechDATA_ID"],
+                    citation["Cross_Link"],
+                    citation["resource_type"],
+                ]
+            )
 
     print(f"Saved {len(all_citations)} citations to {output_file}")
 
+
 def get_data_availability_links(token=None, size=25):
     base_url = "https://authors.library.caltech.edu/api/records?q=metadata.additional_descriptions.type.id%3A%22data-availability%22&size=25&sort=bestmatch"
-    base_file_url_template = "https://authors.library.caltech.edu/api/records/{record_id}/files"
-    
+    base_file_url_template = (
+        "https://authors.library.caltech.edu/api/records/{record_id}/files"
+    )
+
     token = os.environ.get("RDMTOK")
-    
+
     output_file = "test_results_harvesters.csv"
-    
+
     headers = {}
     if token:
         headers = {
             "Authorization": f"Bearer {token}",
             "Content-type": "application/json",
         }
-    
+
     response = requests.get(base_url, headers=headers)
     if response.status_code != 200:
-        print(f"Error: Unable to fetch records from the API. Status code: {response.status_code}")
+        print(
+            f"Error: Unable to fetch records from the API. Status code: {response.status_code}"
+        )
         exit()
-    
+
     records = response.json().get("hits", {}).get("hits", [])
-    
+
     if not records:
         print("No records found.")
         exit()
-    
+
     results = []
     for record in records:
         record_id = record.get("id")
         links = record.get("metadata", {}).get("additional_descriptions", [])
-    
+
         for link_data in links:
             description = link_data.get("description", "")
             links_in_description = extract_https_links(description)
@@ -491,14 +523,16 @@ def get_data_availability_links(token=None, size=25):
                 cleaned = clean_link(link)
                 filename = extract_filename_from_link(link)
                 file_present = is_file_present(record_id, filename)
-    
-                results.append({
-                    "record_id": record_id,
-                    "original_link": link,
-                    "classification": classification,
-                    "cleaned_link": cleaned,
-                    "filename": filename,
-                    "file_present": file_present
-                })
-                
+
+                results.append(
+                    {
+                        "record_id": record_id,
+                        "original_link": link,
+                        "classification": classification,
+                        "cleaned_link": cleaned,
+                        "filename": filename,
+                        "file_present": file_present,
+                    }
+                )
+
     return results
diff --git a/ames/matchers/caltechauthors.py b/ames/matchers/caltechauthors.py
index 02017c85..c41dbf9d 100644
--- a/ames/matchers/caltechauthors.py
+++ b/ames/matchers/caltechauthors.py
@@ -345,19 +345,25 @@ def move_doi(record, token, test=False):
 def add_related_identifiers_from_csv(data_rows, token, test=False):
     """Reads a CSV file and adds related identifiers to each record using the CaltechDATA API."""
 
-    base_url = "https://data.caltechlibrary.dev" if test else "https://data.caltechlibrary.caltech.edu"
+    base_url = (
+        "https://data.caltechlibrary.dev"
+        if test
+        else "https://data.caltechlibrary.caltech.edu"
+    )
     headers = {
         "Authorization": f"Bearer {token}",
         "Content-type": "application/json",
     }
     results = []
     for row in data_rows:
-        record_id = row['Test_ID']
-        doi = row['CaltechAUTHORS_DOI']
-        caltech_author_id = row['CaltechAUTHORS_ID']
-        resource_type = row['resource_type']
+        record_id = row["Test_ID"]
+        doi = row["CaltechAUTHORS_DOI"]
+        caltech_author_id = row["CaltechAUTHORS_ID"]
+        resource_type = row["resource_type"]
 
-        print(f"\nProcessing Test_ID: {record_id} with DOI: {doi} and CaltechAUTHORS_ID: {caltech_author_id}")
+        print(
+            f"\nProcessing Test_ID: {record_id} with DOI: {doi} and CaltechAUTHORS_ID: {caltech_author_id}"
+        )
         print(f"Using resource_type: {resource_type}")
 
         # Fetch the current record
@@ -368,40 +374,52 @@ def add_related_identifiers_from_csv(data_rows, token, test=False):
         record_data = response.json()
 
         # Draft check or create
-        draft_response = requests.get(f"{base_url}/api/records/{record_id}/draft", headers=headers)
+        draft_response = requests.get(
+            f"{base_url}/api/records/{record_id}/draft", headers=headers
+        )
         if draft_response.status_code == 200:
             record_data = draft_response.json()
         else:
-            draft_create_response = requests.post(f"{base_url}/api/records/{record_id}/draft", headers=headers)
+            draft_create_response = requests.post(
+                f"{base_url}/api/records/{record_id}/draft", headers=headers
+            )
             if draft_create_response.status_code != 201:
                 print(f"Error creating draft: {draft_create_response.status_code}")
                 continue
             record_data = draft_create_response.json()
 
-        related_identifiers = record_data.get("metadata", {}).get("related_identifiers", []) or []
+        related_identifiers = (
+            record_data.get("metadata", {}).get("related_identifiers", []) or []
+        )
 
         doi_exists = any(ri.get("identifier") == doi for ri in related_identifiers)
         author_url = f"https://authors.library.caltech.edu/records/{caltech_author_id}"
-        author_url_exists = any(ri.get("identifier") == author_url for ri in related_identifiers)
+        author_url_exists = any(
+            ri.get("identifier") == author_url for ri in related_identifiers
+        )
 
         if not doi_exists:
-            related_identifiers.append({
-                "relation_type": {"id": "issupplementedby"},
-                "identifier": doi,
-                "scheme": "doi",
-                "resource_type": {"id": resource_type}
-            })
+            related_identifiers.append(
+                {
+                    "relation_type": {"id": "issupplementedby"},
+                    "identifier": doi,
+                    "scheme": "doi",
+                    "resource_type": {"id": resource_type},
+                }
+            )
             print(f"Adding DOI: {doi}")
         else:
             print(f"DOI already exists")
 
         if not author_url_exists:
-            related_identifiers.append({
-                "relation_type": {"id": "isreferencedby"},
-                "identifier": author_url,
-                "scheme": "url",
-                "resource_type": {"id": resource_type}
-            })
+            related_identifiers.append(
+                {
+                    "relation_type": {"id": "isreferencedby"},
+                    "identifier": author_url,
+                    "scheme": "url",
+                    "resource_type": {"id": resource_type},
+                }
+            )
             print(f"Adding CaltechAUTHORS link: {author_url}")
         else:
             print(f"CaltechAUTHORS link already exists")
@@ -409,7 +427,9 @@ def add_related_identifiers_from_csv(data_rows, token, test=False):
         record_data["metadata"]["related_identifiers"] = related_identifiers
 
         update_response = requests.put(
-            f"{base_url}/api/records/{record_id}/draft", headers=headers, json=record_data
+            f"{base_url}/api/records/{record_id}/draft",
+            headers=headers,
+            json=record_data,
         )
         if update_response.status_code != 200:
             print(f"Error updating draft: {update_response.status_code}")
@@ -419,7 +439,9 @@ def add_related_identifiers_from_csv(data_rows, token, test=False):
             f"{base_url}/api/records/{record_id}/draft/actions/publish", headers=headers
         )
         if publish_response.status_code != 202:
-            print(f"Error publishing record {record_id}: {publish_response.status_code}")
+            print(
+                f"Error publishing record {record_id}: {publish_response.status_code}"
+            )
             results.append((record_id, False))
             continue
 
@@ -427,6 +449,7 @@ def add_related_identifiers_from_csv(data_rows, token, test=False):
         results.append((record_id, True))
     return results
 
+
 def process_link_updates(input_csv):
     # read the CSV file and build a dictionary: record_id -> {"links": [(link, classification), ...]}
     records_data = {}
@@ -438,9 +461,7 @@ def process_link_updates(input_csv):
             classification = row["classification"].strip()
 
             if record_id not in records_data:
-                records_data[record_id] = {
-                    "links": []
-                }
+                records_data[record_id] = {"links": []}
             records_data[record_id]["links"].append((link, classification))
 
     results = []
@@ -465,11 +486,13 @@ def process_link_updates(input_csv):
             continue
 
         # check existing related identifiers in the record
-        related_identifiers = metadata.get("metadata", {}).get("related_identifiers", [])
+        related_identifiers = metadata.get("metadata", {}).get(
+            "related_identifiers", []
+        )
 
         # run check_doi if a "doi" is present among the links
         doi_check = None
-        for (lk, ctype) in record_info["links"]:
+        for lk, ctype in record_info["links"]:
             if ctype.lower() == "doi":
                 try:
                     doi_check = check_doi(lk, production=True)
@@ -478,7 +501,7 @@ def process_link_updates(input_csv):
 
         # update related identifiers
         updated_metadata, updated_flag = update_related_identifiers(
-            metadata, record_info["links"], source_type="data"  
+            metadata, record_info["links"], source_type="data"
         )
         if updated_flag:
             # saving to local JSON file for reference
diff --git a/run_archives_report.py b/run_archives_report.py
index ebd0e69c..0e591fc2 100644
--- a/run_archives_report.py
+++ b/run_archives_report.py
@@ -139,6 +139,7 @@ def block_fields():
         "text_4",
     ]
 
+
 def accession_format_report(file_obj, repo, aspace, subject=None, years=None):
     fields = [
         "title",
@@ -187,7 +188,9 @@ def accession_report(file_obj, repo, aspace, subject=None, years=None):
         print(f"subject {subject} not found")
         exit()
     print(f"Requesting accessions")
-    file_obj.writerow(["title","identifier","accession_date","agent"] + block_fields())
+    file_obj.writerow(
+        ["title", "identifier", "accession_date", "agent"] + block_fields()
+    )
     for acc in repo.accessions:
         for uri in acc.subjects:
             if search_uri == uri.ref:
diff --git a/run_authors_affiliation_enhancement.py b/run_authors_affiliation_enhancement.py
index 9789bdfd..663d4ba6 100644
--- a/run_authors_affiliation_enhancement.py
+++ b/run_authors_affiliation_enhancement.py
@@ -23,13 +23,13 @@
 
 args = parser.parse_args()
 author_identifier = args.author_identifier
-#to_update = [get_metadata('6dmax-vx632',authors=True)]
+# to_update = [get_metadata('6dmax-vx632',authors=True)]
 to_update = get_author_records(author_identifier, token, all_metadata=True)
 
 for record in to_update:
     add_authors_affiliations(
-            record,
-            token,
-            dimensions_key,
-            allowed_identifiers=ror,
-        )
+        record,
+        token,
+        dimensions_key,
+        allowed_identifiers=ror,
+    )
diff --git a/run_authors_group_report.py b/run_authors_group_report.py
index c91e1aa5..529e1b49 100644
--- a/run_authors_group_report.py
+++ b/run_authors_group_report.py
@@ -3,15 +3,15 @@
 
 group_identifier = sys.argv[1]
 
-#outfile = open(f"{group_identifier}_report.csv", "w")
-#writer = csv.writer(outfile)
+# outfile = open(f"{group_identifier}_report.csv", "w")
+# writer = csv.writer(outfile)
 
 to_update = get_group_records(group_identifier)
 
 outfile = open(f"{group_identifier}_report.json", "w")
 outfile.write(json.dumps(to_update, indent=4))
 
-#for record in to_update:
+# for record in to_update:
 #    if "doi" not in record["pids"]:
 #        metadata = record["metadata"]
 #        publisher = ""
diff --git a/run_authors_name_update.py b/run_authors_name_update.py
index b1228f36..d1a6414b 100644
--- a/run_authors_name_update.py
+++ b/run_authors_name_update.py
@@ -20,7 +20,7 @@
 old_identifier = args.old_identifier
 new_identifier = args.new_identifier
 
-to_update = get_author_records(old_identifier,token)
+to_update = get_author_records(old_identifier, token)
 for record in to_update:
     if args.add:
         edit_author_identifier(
diff --git a/run_caltechauthors_harvestors.py b/run_caltechauthors_harvestors.py
index dc217f52..3e43d1a5 100644
--- a/run_caltechauthors_harvestors.py
+++ b/run_caltechauthors_harvestors.py
@@ -6,11 +6,13 @@
     extract_https_links,
     clean_link,
     extract_filename_from_link,
-    is_file_present
+    is_file_present,
 )
 
 base_url = "https://authors.library.caltech.edu/api/records?q=metadata.additional_descriptions.type.id%3A%22data-availability%22&size=25&sort=bestmatch"
-base_file_url_template = "https://authors.library.caltech.edu/api/records/{record_id}/files"
+base_file_url_template = (
+    "https://authors.library.caltech.edu/api/records/{record_id}/files"
+)
 
 token = os.environ.get("RDMTOK")
 
@@ -25,7 +27,9 @@
 
 response = requests.get(base_url, headers=headers)
 if response.status_code != 200:
-    print(f"Error: Unable to fetch records from the API. Status code: {response.status_code}")
+    print(
+        f"Error: Unable to fetch records from the API. Status code: {response.status_code}"
+    )
     exit()
 
 records = response.json().get("hits", {}).get("hits", [])
@@ -48,14 +52,16 @@
             filename = extract_filename_from_link(link)
             file_present = is_file_present(record_id, filename)
 
-            results.append({
-                "record_id": record_id,
-                "original_link": link,
-                "classification": classification,
-                "cleaned_link": cleaned,
-                "filename": filename,
-                "file_present": file_present
-            })
+            results.append(
+                {
+                    "record_id": record_id,
+                    "original_link": link,
+                    "classification": classification,
+                    "cleaned_link": cleaned,
+                    "filename": filename,
+                    "file_present": file_present,
+                }
+            )
 
 if results:
     with open(output_file, "w", newline="") as f:
diff --git a/run_caltechauthors_matchers.py b/run_caltechauthors_matchers.py
index 3d71948c..a5eb361a 100644
--- a/run_caltechauthors_matchers.py
+++ b/run_caltechauthors_matchers.py
@@ -25,9 +25,7 @@ def main():
             classification = row["classification"].strip()
 
             if record_id not in records_data:
-                records_data[record_id] = {
-                    "links": []
-                }
+                records_data[record_id] = {"links": []}
             records_data[record_id]["links"].append((link, classification))
 
     results = []
@@ -52,11 +50,13 @@ def main():
             continue
 
         # check existing related identifiers in the record
-        related_identifiers = metadata.get("metadata", {}).get("related_identifiers", [])
+        related_identifiers = metadata.get("metadata", {}).get(
+            "related_identifiers", []
+        )
 
         # run check_doi if a "doi" is present among the links
         doi_check = None
-        for (lk, ctype) in record_info["links"]:
+        for lk, ctype in record_info["links"]:
             if ctype.lower() == "doi":
                 try:
                     doi_check = check_doi(lk, production=True)
@@ -65,7 +65,7 @@ def main():
 
         # update related identifiers
         updated_metadata, updated_flag = update_related_identifiers(
-            metadata, record_info["links"], source_type="data"  
+            metadata, record_info["links"], source_type="data"
         )
         if updated_flag:
             # saving to local JSON file for reference
@@ -84,5 +84,6 @@ def main():
             }
         )
 
+
 if __name__ == "__main__":
     main()
diff --git a/run_harvest_links.py b/run_harvest_links.py
index 3d71948c..a5eb361a 100644
--- a/run_harvest_links.py
+++ b/run_harvest_links.py
@@ -25,9 +25,7 @@ def main():
             classification = row["classification"].strip()
 
             if record_id not in records_data:
-                records_data[record_id] = {
-                    "links": []
-                }
+                records_data[record_id] = {"links": []}
             records_data[record_id]["links"].append((link, classification))
 
     results = []
@@ -52,11 +50,13 @@ def main():
             continue
 
         # check existing related identifiers in the record
-        related_identifiers = metadata.get("metadata", {}).get("related_identifiers", [])
+        related_identifiers = metadata.get("metadata", {}).get(
+            "related_identifiers", []
+        )
 
         # run check_doi if a "doi" is present among the links
         doi_check = None
-        for (lk, ctype) in record_info["links"]:
+        for lk, ctype in record_info["links"]:
             if ctype.lower() == "doi":
                 try:
                     doi_check = check_doi(lk, production=True)
@@ -65,7 +65,7 @@ def main():
 
         # update related identifiers
         updated_metadata, updated_flag = update_related_identifiers(
-            metadata, record_info["links"], source_type="data"  
+            metadata, record_info["links"], source_type="data"
         )
         if updated_flag:
             # saving to local JSON file for reference
@@ -84,5 +84,6 @@ def main():
             }
         )
 
+
 if __name__ == "__main__":
     main()
diff --git a/tests/test_matchers.py b/tests/test_matchers.py
index 9e8c5840..bc6d7b24 100644
--- a/tests/test_matchers.py
+++ b/tests/test_matchers.py
@@ -5,38 +5,46 @@
 import requests
 import sys
 
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 
 from ames.matchers.caltechauthors import add_related_identifiers_from_csv
 
 token = "0UrVehnBSM9c7DQZZCM2EtB4lpuEwbTiLue6rf0Vme3lnzswlMA9whjJbmhX"
 CSV_FILE = "test.csv"
 
+
 def load_test_data(from_csv=True):
     if from_csv and os.path.exists(CSV_FILE):
-        with open(CSV_FILE, 'r') as f:
+        with open(CSV_FILE, "r") as f:
             reader = csv.DictReader(f)
             return list(reader)
     else:
-        return [{
-            "CaltechAUTHORS_ID": "bwww3-z8y74",
-            "CaltechAUTHORS_DOI": f"10.1093/mnras/staa{random.randint(1000, 9999)}",
-            "Related_DOI": "10.22002/D1.1458",
-            "Data_ID": "3hqgp-jhw61",
-            "Cross_Link": "No",
-            "Test_ID": "99s7k-d6f58",
-            "resource_type": "publication-article"
-        }]
+        return [
+            {
+                "CaltechAUTHORS_ID": "bwww3-z8y74",
+                "CaltechAUTHORS_DOI": f"10.1093/mnras/staa{random.randint(1000, 9999)}",
+                "Related_DOI": "10.22002/D1.1458",
+                "Data_ID": "3hqgp-jhw61",
+                "Cross_Link": "No",
+                "Test_ID": "99s7k-d6f58",
+                "resource_type": "publication-article",
+            }
+        ]
+
 
 def verify_related_identifiers_on_site(data_rows, test=False):
-    base_url = "https://data.caltechlibrary.dev" if test else "https://data.caltechlibrary.caltech.edu"
+    base_url = (
+        "https://data.caltechlibrary.dev"
+        if test
+        else "https://data.caltechlibrary.caltech.edu"
+    )
     headers = {"Authorization": f"Bearer {token}"}
     results = []
 
     for row in data_rows:
-        record_id = row['Test_ID']
-        doi = row['CaltechAUTHORS_DOI']
-        caltech_author_id = row['CaltechAUTHORS_ID']
+        record_id = row["Test_ID"]
+        doi = row["CaltechAUTHORS_DOI"]
+        caltech_author_id = row["CaltechAUTHORS_ID"]
         author_url = f"https://authors.library.caltech.edu/records/{caltech_author_id}"
 
         r = requests.get(f"{base_url}/api/records/{record_id}", headers=headers)
@@ -59,10 +67,13 @@ def verify_related_identifiers_on_site(data_rows, test=False):
 
     return results
 
+
 class TestCaltechDataUploader(unittest.TestCase):
 
     def test_add_and_verify_related_identifiers(self):
-        test_data = load_test_data(from_csv=False)  # <-- change this flag to toggle source
+        test_data = load_test_data(
+            from_csv=False
+        )  # <-- change this flag to toggle source
         upload_results = add_related_identifiers_from_csv(test_data, token, test=True)
         for record_id, success in upload_results:
             self.assertTrue(success, f"❌ Upload failed for record {record_id}")

From abe8aaad54307cf1a76c5cc24e22c134ef13b43a Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Mon, 19 May 2025 14:51:28 -0700
Subject: [PATCH 05/19] Update test_matchers.py

---
 tests/test_matchers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_matchers.py b/tests/test_matchers.py
index bc6d7b24..305aa752 100644
--- a/tests/test_matchers.py
+++ b/tests/test_matchers.py
@@ -5,11 +5,12 @@
 import requests
 import sys
 
+
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 
 from ames.matchers.caltechauthors import add_related_identifiers_from_csv
 
-token = "0UrVehnBSM9c7DQZZCM2EtB4lpuEwbTiLue6rf0Vme3lnzswlMA9whjJbmhX"
+token = os.environ.get("RDMTOK")
 CSV_FILE = "test.csv"
 
 

From b6af8ae111b6f0e6aeb3cf0ec664fd4d4326f6e6 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Mon, 19 May 2025 14:57:02 -0700
Subject: [PATCH 06/19] Update caltechauthors.py

---
 ames/matchers/caltechauthors.py | 85 +++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/ames/matchers/caltechauthors.py b/ames/matchers/caltechauthors.py
index c41dbf9d..d1956bfd 100644
--- a/ames/matchers/caltechauthors.py
+++ b/ames/matchers/caltechauthors.py
@@ -520,3 +520,88 @@ def process_link_updates(input_csv):
             }
         )
     return results
+    
+def add_authors_affiliations(record, token, dimensions_key, allowed_identifiers=None):
+    # Add dimensions affiliations to a record
+
+    record_id = record["id"]
+    if "doi" in record["pids"]:
+        doi = record["pids"]["doi"]["identifier"]
+    else:
+        doi = None
+        if "identifiers" in record["metadata"]:
+            for idv in record["metadata"]["identifiers"]:
+                if idv["scheme"] == "doi":
+                    doi = idv["identifier"]
+    if doi:
+        endpoint = "https://cris-api.dimensions.ai/v3"
+        dimcli.login(key=dimensions_key, endpoint=endpoint, verbose=False)
+        dsl = dimcli.Dsl()
+        res = dsl.query_iterative(
+            f"""
+        search publications
+        where doi = "{doi}"
+        return publications[basics+extras+abstract] """,
+            verbose=False,
+        )
+        publication = res.json["publications"]
+        update = False
+        if len(publication) == 1:
+            publication = publication[0]
+            dimensions_authors = publication.get("authors", [])
+            existing_authors = record["metadata"]["creators"]
+            if len(dimensions_authors) == len(existing_authors):
+                for position in range(len(dimensions_authors)):
+                    author = existing_authors[position]
+                    dimensions_author = dimensions_authors[position]
+                    if "affiliations" not in author:
+                        affiliations = []
+                        affiliation_ids = []
+                        if dimensions_author["affiliations"] not in [[], None]:
+                            for affiliation in dimensions_author["affiliations"]:
+                                affil = {}
+                                if "id" in affiliation:
+                                    if affiliation["id"] is not None:
+                                        ror = grid_to_ror(affiliation["id"])
+                                        if ror is not None:
+                                            if allowed_identifiers is not None:
+                                                if ror in allowed_identifiers:
+                                                    affil["id"] = ror
+                                                else:
+                                                    print(
+                                                        "ROR %s not in allowed identifiers list"
+                                                        % ror
+                                                    )
+                                        else:
+                                            print(
+                                                "Missing ROR for affiliation %s"
+                                                % affiliation["id"]
+                                            )
+                                # We have to manually handle incorrectly mapped JPL
+                                # affiliations
+                                if "raw_affiliation" in affiliation:
+                                    raw = affiliation["raw_affiliation"]
+                                    affil["name"] = raw
+                                    if "91109" in raw:
+                                        affil["id"] = "027k65916"
+                                    if "Jet Propulsion Laboratory" in raw:
+                                        affil["id"] = "027k65916"
+                                    if "JPL" in raw:
+                                        affil["id"] = "027k65916"
+                                # Some dimensions records don't include id values.
+                                # We ignore those for now
+                                if "id" in affil:
+                                    if affil["id"] not in affiliation_ids:
+                                        update = True
+                                        affiliation_ids.append(affil["id"])
+                                        affiliations.append(affil)
+                            existing_authors[position]["affiliations"] = affiliations
+        if update:
+            caltechdata_edit(
+                record_id,
+                metadata=record,
+                token=token,
+                production=True,
+                publish=True,
+                authors=True,
+            )

From d1afcdf481c153a67f89aabdddcc206dd31f1743 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Mon, 19 May 2025 15:08:39 -0700
Subject: [PATCH 07/19] Update test_matchers.py

---
 tests/test_matchers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_matchers.py b/tests/test_matchers.py
index 305aa752..0373dad2 100644
--- a/tests/test_matchers.py
+++ b/tests/test_matchers.py
@@ -33,7 +33,7 @@ def load_test_data(from_csv=True):
         ]
 
 
-def verify_related_identifiers_on_site(data_rows, test=False):
+def verify_related_identifiers_on_site(data_rows, test=True):
     base_url = (
         "https://data.caltechlibrary.dev"
         if test

From 472967670d204370670407b8c37434a19d4139aa Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 09:58:48 +0000
Subject: [PATCH 08/19] Add updated CITATION.cff from codemeta.json file

---
 CITATION.cff | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CITATION.cff b/CITATION.cff
index 3381ac61..b8693bb4 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -21,4 +21,4 @@ keywords:
   - GitHub
   - metadata
   - software
-date-released: 2025-05-19
+date-released: 2025-06-04

From c88bf89a40a6f04239f1d018e0ee624940535af5 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 03:02:28 -0700
Subject: [PATCH 09/19] Update caltechauthors.py

---
 ames/matchers/caltechauthors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ames/matchers/caltechauthors.py b/ames/matchers/caltechauthors.py
index d1956bfd..14cdc92c 100644
--- a/ames/matchers/caltechauthors.py
+++ b/ames/matchers/caltechauthors.py
@@ -4,7 +4,7 @@
 from caltechdata_api import caltechdata_edit
 
 
-# function to get metadata for a record
+# function to get metadata for a records
 def get_record_metadata(record_id):
     metadata_url = f"https://authors.library.caltech.edu/api/records/{record_id}"
     headers = {}

From 52f4c308709f43c79b692107ed9191037ebb0b1f Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 03:10:49 -0700
Subject: [PATCH 10/19] Update caltechauthors.py

---
 ames/matchers/caltechauthors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ames/matchers/caltechauthors.py b/ames/matchers/caltechauthors.py
index 14cdc92c..d1956bfd 100644
--- a/ames/matchers/caltechauthors.py
+++ b/ames/matchers/caltechauthors.py
@@ -4,7 +4,7 @@
 from caltechdata_api import caltechdata_edit
 
 
-# function to get metadata for a records
+# function to get metadata for a record
 def get_record_metadata(record_id):
     metadata_url = f"https://authors.library.caltech.edu/api/records/{record_id}"
     headers = {}

From 0025215c9c4893ac3eb57d59783b82ae85526515 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 03:42:45 -0700
Subject: [PATCH 11/19] Update test_matchers.py

---
 tests/test_matchers.py | 82 +++++++++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 28 deletions(-)

diff --git a/tests/test_matchers.py b/tests/test_matchers.py
index 0373dad2..34bcbe18 100644
--- a/tests/test_matchers.py
+++ b/tests/test_matchers.py
@@ -1,10 +1,17 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Extended-logging version of tests/test_matchers.py
+Adds prints so you can see where the flow dies.
+"""
+
 import os
 import unittest
 import csv
 import random
 import requests
 import sys
-
+from datetime import datetime
 
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 
@@ -13,24 +20,33 @@
 token = os.environ.get("RDMTOK")
 CSV_FILE = "test.csv"
 
+print(f"[{datetime.now().isoformat()}] RDMTOK present? {'YES' if token else 'NO'}")
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
 
 def load_test_data(from_csv=True):
+    print(f"[loader] from_csv={from_csv}")
     if from_csv and os.path.exists(CSV_FILE):
-        with open(CSV_FILE, "r") as f:
+        print(f"[loader] reading {CSV_FILE}")
+        with open(CSV_FILE, "r", newline="") as f:
             reader = csv.DictReader(f)
-            return list(reader)
+            rows = list(reader)
+            print(f"[loader] loaded {len(rows)} rows")
+            return rows
     else:
-        return [
-            {
-                "CaltechAUTHORS_ID": "bwww3-z8y74",
-                "CaltechAUTHORS_DOI": f"10.1093/mnras/staa{random.randint(1000, 9999)}",
-                "Related_DOI": "10.22002/D1.1458",
-                "Data_ID": "3hqgp-jhw61",
-                "Cross_Link": "No",
-                "Test_ID": "99s7k-d6f58",
-                "resource_type": "publication-article",
-            }
-        ]
+        dummy = {
+            "CaltechAUTHORS_ID": "bwww3-z8y74",
+            "CaltechAUTHORS_DOI": f"10.1093/mnras/staa{random.randint(1000, 9999)}",
+            "Related_DOI": "10.22002/D1.1458",
+            "Data_ID": "3hqgp-jhw61",
+            "Cross_Link": "No",
+            "Test_ID": "99s7k-d6f58",
+            "resource_type": "publication-article",
+        }
+        print(f"[loader] generated 1 synthetic row -> DOI {dummy['CaltechAUTHORS_DOI']}")
+        return [dummy]
 
 
 def verify_related_identifiers_on_site(data_rows, test=True):
@@ -48,14 +64,19 @@ def verify_related_identifiers_on_site(data_rows, test=True):
         caltech_author_id = row["CaltechAUTHORS_ID"]
         author_url = f"https://authors.library.caltech.edu/records/{caltech_author_id}"
 
-        r = requests.get(f"{base_url}/api/records/{record_id}", headers=headers)
+        url = f"{base_url}/api/records/{record_id}"
+        print(f"[verify] GET {url}")
+        r = requests.get(url, headers=headers)
+        print(f"[verify] status={r.status_code}")
         if r.status_code != 200:
-            print(f"❌ Could not fetch record {record_id}")
+            print(f"❌ Could not fetch record {record_id}: {r.text[:300]}")
             results.append((record_id, False))
             continue
 
         metadata = r.json().get("metadata", {})
         related = metadata.get("related_identifiers", [])
+        print(f"[verify] related_identifiers → {related}")
+
         found_doi = any(x["identifier"] == doi for x in related)
         found_author = any(x["identifier"] == author_url for x in related)
 
@@ -63,26 +84,31 @@ def verify_related_identifiers_on_site(data_rows, test=True):
             print(f"✅ Verified: {record_id}")
             results.append((record_id, True))
         else:
-            print(f"❌ Verification failed: {record_id}")
+            print(f"❌ Verification failed: {record_id} "
+                  f"(doi={found_doi}, author={found_author})")
             results.append((record_id, False))
 
     return results
 
 
+# ---------------------------------------------------------------------------
+# Unit-test
+# ---------------------------------------------------------------------------
+
 class TestCaltechDataUploader(unittest.TestCase):
 
     def test_add_and_verify_related_identifiers(self):
-        test_data = load_test_data(
-            from_csv=False
-        )  # <-- change this flag to toggle source
-        upload_results = add_related_identifiers_from_csv(test_data, token, test=True)
-        for record_id, success in upload_results:
-            self.assertTrue(success, f"❌ Upload failed for record {record_id}")
+        test_data = load_test_data(from_csv=False)  # flip flag to change source
 
-        verify_results = verify_related_identifiers_on_site(test_data, test=True)
-        for record_id, success in verify_results:
-            self.assertTrue(success, f"❌ Verification failed for record {record_id}")
+        print("[test] calling add_related_identifiers_from_csv ...")
+        upload_results = add_related_identifiers_from_csv(
+            test_data, token, test=True
+        )
+        print(f"[test] upload_results → {upload_results}")
 
+        for record_id, success in upload_results:
+            print(f"[test] upload {record_id}: {'OK' if success else 'FAIL'}")
+            self.assertTrue(success, f"❌ Upload failed for record {record_id}")
 
-if __name__ == "__main__":
-    unittest.main()
+        print("[test] verifying on server ...")
+        verify_results = verify_related_identifier_

From 1e3c2f1282d7590b529350032abe86bf0017cf08 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 03:44:54 -0700
Subject: [PATCH 12/19] Update test_matchers.py

---
 tests/test_matchers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_matchers.py b/tests/test_matchers.py
index 34bcbe18..a5751c52 100644
--- a/tests/test_matchers.py
+++ b/tests/test_matchers.py
@@ -20,7 +20,8 @@
 token = os.environ.get("RDMTOK")
 CSV_FILE = "test.csv"
 
-print(f"[{datetime.now().isoformat()}] RDMTOK present? {'YES' if token else 'NO'}")
+print(f"[debug] RDMTOK present? {'YES' if token else 'NO'} "
+      f"(len={len(token) if token else 0})")
 
 # ---------------------------------------------------------------------------
 # Helpers

From 3ad7bd27553a597811a8c8f411530820811e3118 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 03:48:44 -0700
Subject: [PATCH 13/19] Update test.yml

---
 .github/workflows/test.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 867667a5..42ab51c0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,5 +30,7 @@ jobs:
         pip install -r requirements.txt || true  
 
     - name: Run tests for caltechauthors
+      env:
+          RDMTOK: ${{ secrets.RDMTOK }} 
       run: |
         PYTHONPATH=${{ github.workspace }} python -m unittest discover -s tests -p 'test_matchers.py'

From 9300b13c1f6049615e55f4ea806aa7d5f475daf7 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 03:49:03 -0700
Subject: [PATCH 14/19] Update caltechauthors.py

---
 ames/matchers/caltechauthors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ames/matchers/caltechauthors.py b/ames/matchers/caltechauthors.py
index d1956bfd..14cdc92c 100644
--- a/ames/matchers/caltechauthors.py
+++ b/ames/matchers/caltechauthors.py
@@ -4,7 +4,7 @@
 from caltechdata_api import caltechdata_edit
 
 
-# function to get metadata for a record
+# function to get metadata for a records
 def get_record_metadata(record_id):
     metadata_url = f"https://authors.library.caltech.edu/api/records/{record_id}"
     headers = {}

From 2f7d245c6c6181700bbcb1d3d142f9e9160e04a9 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 03:53:01 -0700
Subject: [PATCH 15/19] Update test_matchers.py

---
 tests/test_matchers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_matchers.py b/tests/test_matchers.py
index a5751c52..83752fe7 100644
--- a/tests/test_matchers.py
+++ b/tests/test_matchers.py
@@ -112,4 +112,4 @@ def test_add_and_verify_related_identifiers(self):
             self.assertTrue(success, f"❌ Upload failed for record {record_id}")
 
         print("[test] verifying on server ...")
-        verify_results = verify_related_identifier_
+        verify_results = verify_related_identifiers_on_site(test_data, test=True)

From d283a146c1de88af6235f33b04b4206668d9f88f Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 04:00:13 -0700
Subject: [PATCH 16/19] Update test_matchers.py

---
 tests/test_matchers.py | 120 +++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 71 deletions(-)

diff --git a/tests/test_matchers.py b/tests/test_matchers.py
index 83752fe7..a04da0f9 100644
--- a/tests/test_matchers.py
+++ b/tests/test_matchers.py
@@ -1,115 +1,93 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-"""
-Extended-logging version of tests/test_matchers.py
-Adds prints so you can see where the flow dies.
-"""
-
-import os
-import unittest
 import csv
+import os
 import random
-import requests
 import sys
-from datetime import datetime
+import unittest
 
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+import requests
 
-from ames.matchers.caltechauthors import add_related_identifiers_from_csv
+# Ensure the local project package is importable when the repo root is the CWD.
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+from ames.matchers.caltechauthors import add_related_identifiers_from_csv  # noqa: E402
 
-token = os.environ.get("RDMTOK")
+TOKEN = os.getenv("RDMTOK")
 CSV_FILE = "test.csv"
 
-print(f"[debug] RDMTOK present? {'YES' if token else 'NO'} "
-      f"(len={len(token) if token else 0})")
+print(f"[init] RDMTOK present: {'YES' if TOKEN else 'NO'} (len={len(TOKEN) if TOKEN else 0})")
 
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
 
-def load_test_data(from_csv=True):
-    print(f"[loader] from_csv={from_csv}")
+def load_test_data(from_csv: bool = True):
+    """Return rows for the upload function, from CSV when available."""
     if from_csv and os.path.exists(CSV_FILE):
-        print(f"[loader] reading {CSV_FILE}")
-        with open(CSV_FILE, "r", newline="") as f:
-            reader = csv.DictReader(f)
-            rows = list(reader)
-            print(f"[loader] loaded {len(rows)} rows")
-            return rows
-    else:
-        dummy = {
+        with open(CSV_FILE, newline="") as fh:
+            return list(csv.DictReader(fh))
+
+    doi_stub = random.randint(1000, 9999)
+    return [
+        {
             "CaltechAUTHORS_ID": "bwww3-z8y74",
-            "CaltechAUTHORS_DOI": f"10.1093/mnras/staa{random.randint(1000, 9999)}",
+            "CaltechAUTHORS_DOI": f"10.1093/mnras/staa{doi_stub}",
             "Related_DOI": "10.22002/D1.1458",
             "Data_ID": "3hqgp-jhw61",
             "Cross_Link": "No",
             "Test_ID": "99s7k-d6f58",
             "resource_type": "publication-article",
         }
-        print(f"[loader] generated 1 synthetic row -> DOI {dummy['CaltechAUTHORS_DOI']}")
-        return [dummy]
+    ]
 
 
-def verify_related_identifiers_on_site(data_rows, test=True):
-    base_url = (
+def verify_related_identifiers_on_site(rows, *, test: bool = True):
+    """Fetch each record and report which links are present or missing."""
+    base = (
         "https://data.caltechlibrary.dev"
         if test
         else "https://data.caltechlibrary.caltech.edu"
     )
-    headers = {"Authorization": f"Bearer {token}"}
+    headers = {"Authorization": f"Bearer {TOKEN}"}
     results = []
 
-    for row in data_rows:
+    for row in rows:
         record_id = row["Test_ID"]
         doi = row["CaltechAUTHORS_DOI"]
-        caltech_author_id = row["CaltechAUTHORS_ID"]
-        author_url = f"https://authors.library.caltech.edu/records/{caltech_author_id}"
-
-        url = f"{base_url}/api/records/{record_id}"
-        print(f"[verify] GET {url}")
-        r = requests.get(url, headers=headers)
-        print(f"[verify] status={r.status_code}")
-        if r.status_code != 200:
-            print(f"❌ Could not fetch record {record_id}: {r.text[:300]}")
+        author_link = f"https://authors.library.caltech.edu/records/{row['CaltechAUTHORS_ID']}"
+
+        resp = requests.get(f"{base}/api/records/{record_id}", headers=headers)
+        print(f"[verify] {record_id}: {resp.status_code}")
+        if resp.status_code != 200:
+            print("    Error: could not fetch record from server.")
             results.append((record_id, False))
             continue
 
-        metadata = r.json().get("metadata", {})
-        related = metadata.get("related_identifiers", [])
-        print(f"[verify] related_identifiers → {related}")
+        related = resp.json().get("metadata", {}).get("related_identifiers", [])
+        has_doi = any(x["identifier"] == doi for x in related)
+        has_author = any(x["identifier"] == author_link for x in related)
 
-        found_doi = any(x["identifier"] == doi for x in related)
-        found_author = any(x["identifier"] == author_url for x in related)
+        status_parts = [
+            "DOI link present" if has_doi else "DOI link missing",
+            "CaltechAUTHORS link present" if has_author else "CaltechAUTHORS link missing",
+        ]
+        print("    " + "; ".join(status_parts))
 
-        if found_doi and found_author:
-            print(f"✅ Verified: {record_id}")
-            results.append((record_id, True))
-        else:
-            print(f"❌ Verification failed: {record_id} "
-                  f"(doi={found_doi}, author={found_author})")
-            results.append((record_id, False))
+        results.append((record_id, has_doi and has_author))
 
     return results
 
 
-# ---------------------------------------------------------------------------
-# Unit-test
-# ---------------------------------------------------------------------------
-
 class TestCaltechDataUploader(unittest.TestCase):
-
+    @unittest.skipUnless(TOKEN, "needs RDMTOK to hit CaltechDATA API")
     def test_add_and_verify_related_identifiers(self):
-        test_data = load_test_data(from_csv=False)  # flip flag to change source
+        rows = load_test_data(from_csv=False)
+
+        uploads = add_related_identifiers_from_csv(rows, TOKEN, test=True)
+        for record_id, ok in uploads:
+            self.assertTrue(ok, f"upload failed for {record_id}")
 
-        print("[test] calling add_related_identifiers_from_csv ...")
-        upload_results = add_related_identifiers_from_csv(
-            test_data, token, test=True
-        )
-        print(f"[test] upload_results → {upload_results}")
+        verifies = verify_related_identifiers_on_site(rows, test=True)
+        for record_id, ok in verifies:
+            self.assertTrue(ok, f"verification failed for {record_id}")
 
-        for record_id, success in upload_results:
-            print(f"[test] upload {record_id}: {'OK' if success else 'FAIL'}")
-            self.assertTrue(success, f"❌ Upload failed for record {record_id}")
 
-        print("[test] verifying on server ...")
-        verify_results = verify_related_identifiers_on_site(test_data, test=True)
+if __name__ == "__main__":
+    unittest.main(verbosity=2)

From 3c6d71739e133af880e9f3f1d278562878017a81 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 09:01:54 -0700
Subject: [PATCH 17/19] Update caltechauthors.py

---
 ames/matchers/caltechauthors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ames/matchers/caltechauthors.py b/ames/matchers/caltechauthors.py
index 14cdc92c..d1956bfd 100644
--- a/ames/matchers/caltechauthors.py
+++ b/ames/matchers/caltechauthors.py
@@ -4,7 +4,7 @@
 from caltechdata_api import caltechdata_edit
 
 
-# function to get metadata for a records
+# function to get metadata for a record
 def get_record_metadata(record_id):
     metadata_url = f"https://authors.library.caltech.edu/api/records/{record_id}"
     headers = {}

From ec967339515e7227453a3f7d6e0824e5291d9a22 Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 09:07:03 -0700
Subject: [PATCH 18/19] Update codemeta.json

---
 codemeta.json | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/codemeta.json b/codemeta.json
index 639447c7..9acd3040 100755
--- a/codemeta.json
+++ b/codemeta.json
@@ -30,6 +30,17 @@
             "email": "rsdoiel@caltech.edu",
             "@id": "https://orcid.org/0000-0003-0900-6903"
         },
+        {
+            "@type": "Person",
+            "givenName": "Rohan",
+            "familyName": "Bhattarai",
+            "affiliation": {
+                "@type": "Organization",
+                "name": "Caltech"
+            },
+            "email": "rbhattar@caltech.edu",
+            "@id": "https://orcid.org/0009-0007-0323-4733"
+        },
         {
             "@type": "Person",
             "givenName": "Elizabeth",

From 638d0aa47c96217c7eb9ae308f3545333ebddf7f Mon Sep 17 00:00:00 2001
From: RohanBhattaraiNP <RohanBhattaraiNP@users.noreply.github.com>
Date: Wed, 4 Jun 2025 16:07:21 +0000
Subject: [PATCH 19/19] Add updated CITATION.cff from codemeta.json file

---
 CITATION.cff | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CITATION.cff b/CITATION.cff
index b8693bb4..ff07e63a 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -8,6 +8,9 @@ authors:
   - family-names: Doiel
     given-names: Robert
     orcid: https://orcid.org/0000-0003-0900-6903
+  - family-names: Bhattarai
+    given-names: Rohan
+    orcid: https://orcid.org/0009-0007-0323-4733
   - family-names: Won
     given-names: Elizabeth
     orcid: https://orcid.org/0009-0002-2450-6471