diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 00000000..42ab51c0
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,36 @@
+name: Test CaltechAuthors Matcher
+
+on:
+  push:
+    paths:
+      - 'ames/matchers/caltechauthors.py'
+      - 'tests/**'
+  pull_request:
+    paths:
+      - 'ames/matchers/caltechauthors.py'
+      - 'tests/**'
+  workflow_dispatch:
+
+jobs:
+  test-caltechauthors:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt || true  
+
+    - name: Run tests for caltechauthors
+      env:
+          RDMTOK: ${{ secrets.RDMTOK }} 
+      run: |
+        PYTHONPATH=${{ github.workspace }} python -m unittest discover -s tests -p 'test_matchers.py'
diff --git a/CITATION.cff b/CITATION.cff
index 3381ac61..ff07e63a 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -8,6 +8,9 @@ authors:
   - family-names: Doiel
     given-names: Robert
     orcid: https://orcid.org/0000-0003-0900-6903
+  - family-names: Bhattarai
+    given-names: Rohan
+    orcid: https://orcid.org/0009-0007-0323-4733
   - family-names: Won
     given-names: Elizabeth
     orcid: https://orcid.org/0009-0002-2450-6471
@@ -21,4 +24,4 @@ keywords:
   - GitHub
   - metadata
   - software
-date-released: 2025-05-19
+date-released: 2025-06-04
diff --git a/add_orcid_script.py b/add_orcid_script.py
index 92d0d6f4..0bd59160 100644
--- a/add_orcid_script.py
+++ b/add_orcid_script.py
@@ -1,9 +1,11 @@
-import csv,os
+import csv, os
 
-with open('orcids.csv', 'r') as f:
+with open("orcids.csv", "r") as f:
     reader = csv.reader(f)
     orcid_list = list(reader)
     for orcid_data in orcid_list:
         orcid = orcid_data[8]
         clpid = orcid_data[10]
-        os.system(f'python run_authors_name_update.py {clpid} {orcid} -add -new-scheme orcid')
+        os.system(
+            f"python run_authors_name_update.py {clpid} {orcid} -add -new-scheme orcid"
+        )
diff --git a/ames/harvesters/__init__.py b/ames/harvesters/__init__.py
index 1add4aea..5e290431 100644
--- a/ames/harvesters/__init__.py
+++ b/ames/harvesters/__init__.py
@@ -24,3 +24,4 @@
 from .caltechauthors import is_file_present
 from .caltechauthors import get_series_records
 from .caltechauthors import generate_data_citation_csv
+from .caltechauthors import get_data_availability_links
diff --git a/ames/harvesters/caltechauthors.py b/ames/harvesters/caltechauthors.py
index 0695784c..454801d2 100644
--- a/ames/harvesters/caltechauthors.py
+++ b/ames/harvesters/caltechauthors.py
@@ -205,9 +205,7 @@ def get_author_records(
     query = f'?q=metadata.creators.person_or_org.identifiers.identifier%3A"{author_identifier}"'
 
     if date:
-        query += (
-            f"%20AND%20metadata.publication_date%3A%5B{date}%20TO%20%2A%20%5D"
-        )
+        query += f"%20AND%20metadata.publication_date%3A%5B{date}%20TO%20%2A%20%5D"
 
     if token:
         headers = {
@@ -482,3 +480,61 @@ def generate_data_citation_csv():
             )
 
     print(f"Saved {len(all_citations)} citations to {output_file}")
+
+
+def get_data_availability_links(token=None, size=25):
+    base_url = "https://authors.library.caltech.edu/api/records?q=metadata.additional_descriptions.type.id%3A%22data-availability%22&size=25&sort=bestmatch"
+    base_file_url_template = (
+        "https://authors.library.caltech.edu/api/records/{record_id}/files"
+    )
+
+    token = os.environ.get("RDMTOK")
+
+    output_file = "test_results_harvesters.csv"
+
+    headers = {}
+    if token:
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-type": "application/json",
+        }
+
+    response = requests.get(base_url, headers=headers)
+    if response.status_code != 200:
+        print(
+            f"Error: Unable to fetch records from the API. Status code: {response.status_code}"
+        )
+        exit()
+
+    records = response.json().get("hits", {}).get("hits", [])
+
+    if not records:
+        print("No records found.")
+        exit()
+
+    results = []
+    for record in records:
+        record_id = record.get("id")
+        links = record.get("metadata", {}).get("additional_descriptions", [])
+
+        for link_data in links:
+            description = link_data.get("description", "")
+            links_in_description = extract_https_links(description)
+            for link in links_in_description:
+                classification = classify_link(link)
+                cleaned = clean_link(link)
+                filename = extract_filename_from_link(link)
+                file_present = is_file_present(record_id, filename)
+
+                results.append(
+                    {
+                        "record_id": record_id,
+                        "original_link": link,
+                        "classification": classification,
+                        "cleaned_link": cleaned,
+                        "filename": filename,
+                        "file_present": file_present,
+                    }
+                )
+
+    return results
diff --git a/ames/matchers/__init__.py b/ames/matchers/__init__.py
index a5923f56..69fb09bc 100644
--- a/ames/matchers/__init__.py
+++ b/ames/matchers/__init__.py
@@ -24,3 +24,4 @@
 from .caltechauthors import save_metadata_to_file
 from .caltechauthors import add_related_identifiers_from_csv
 from .caltechauthors import add_authors_affiliations
+from .caltechauthors import process_link_updates
diff --git a/ames/matchers/caltechauthors.py b/ames/matchers/caltechauthors.py
index 29fd02af..d1956bfd 100644
--- a/ames/matchers/caltechauthors.py
+++ b/ames/matchers/caltechauthors.py
@@ -342,7 +342,7 @@ def move_doi(record, token, test=False):
         )
 
 
-def add_related_identifiers_from_csv(csv_path, test=False):
+def add_related_identifiers_from_csv(data_rows, token, test=False):
     """Reads a CSV file and adds related identifiers to each record using the CaltechDATA API."""
 
     base_url = (
@@ -354,108 +354,173 @@ def add_related_identifiers_from_csv(csv_path, test=False):
         "Authorization": f"Bearer {token}",
         "Content-type": "application/json",
     }
+    results = []
+    for row in data_rows:
+        record_id = row["Test_ID"]
+        doi = row["CaltechAUTHORS_DOI"]
+        caltech_author_id = row["CaltechAUTHORS_ID"]
+        resource_type = row["resource_type"]
 
-    with open(csv_path, "r") as csvfile:
-        reader = csv.DictReader(csvfile)
-        for row in reader:
-            record_id = row["Test_ID"]
-            doi = row["CaltechAUTHORS_DOI"]
-            caltech_author_id = row["CaltechAUTHORS_ID"]
-            resource_type = row["resource_type"]
+        print(
+            f"\nProcessing Test_ID: {record_id} with DOI: {doi} and CaltechAUTHORS_ID: {caltech_author_id}"
+        )
+        print(f"Using resource_type: {resource_type}")
 
-            print(
-                f"\nProcessing Test_ID: {record_id} with DOI: {doi} and CaltechAUTHORS_ID: {caltech_author_id}"
-            )
-            print(f"Using resource_type: {resource_type}")
+        # Fetch the current record
+        response = requests.get(f"{base_url}/api/records/{record_id}", headers=headers)
+        if response.status_code != 200:
+            print(f"Error fetching record {record_id}: {response.status_code}")
+            continue
+        record_data = response.json()
 
-            # Fetch the current record
-            response = requests.get(
-                f"{base_url}/api/records/{record_id}", headers=headers
+        # Draft check or create
+        draft_response = requests.get(
+            f"{base_url}/api/records/{record_id}/draft", headers=headers
+        )
+        if draft_response.status_code == 200:
+            record_data = draft_response.json()
+        else:
+            draft_create_response = requests.post(
+                f"{base_url}/api/records/{record_id}/draft", headers=headers
             )
-            if response.status_code != 200:
-                print(f"Error fetching record {record_id}: {response.status_code}")
+            if draft_create_response.status_code != 201:
+                print(f"Error creating draft: {draft_create_response.status_code}")
                 continue
-            record_data = response.json()
+            record_data = draft_create_response.json()
 
-            # Draft check or create
-            draft_response = requests.get(
-                f"{base_url}/api/records/{record_id}/draft", headers=headers
-            )
-            if draft_response.status_code == 200:
-                record_data = draft_response.json()
-            else:
-                draft_create_response = requests.post(
-                    f"{base_url}/api/records/{record_id}/draft", headers=headers
-                )
-                if draft_create_response.status_code != 201:
-                    print(f"Error creating draft: {draft_create_response.status_code}")
-                    continue
-                record_data = draft_create_response.json()
+        related_identifiers = (
+            record_data.get("metadata", {}).get("related_identifiers", []) or []
+        )
 
-            related_identifiers = (
-                record_data.get("metadata", {}).get("related_identifiers", []) or []
-            )
+        doi_exists = any(ri.get("identifier") == doi for ri in related_identifiers)
+        author_url = f"https://authors.library.caltech.edu/records/{caltech_author_id}"
+        author_url_exists = any(
+            ri.get("identifier") == author_url for ri in related_identifiers
+        )
 
-            doi_exists = any(ri.get("identifier") == doi for ri in related_identifiers)
-            author_url = (
-                f"https://authors.library.caltech.edu/records/{caltech_author_id}"
+        if not doi_exists:
+            related_identifiers.append(
+                {
+                    "relation_type": {"id": "issupplementedby"},
+                    "identifier": doi,
+                    "scheme": "doi",
+                    "resource_type": {"id": resource_type},
+                }
             )
-            author_url_exists = any(
-                ri.get("identifier") == author_url for ri in related_identifiers
+            print(f"Adding DOI: {doi}")
+        else:
+            print(f"DOI already exists")
+
+        if not author_url_exists:
+            related_identifiers.append(
+                {
+                    "relation_type": {"id": "isreferencedby"},
+                    "identifier": author_url,
+                    "scheme": "url",
+                    "resource_type": {"id": resource_type},
+                }
             )
+            print(f"Adding CaltechAUTHORS link: {author_url}")
+        else:
+            print(f"CaltechAUTHORS link already exists")
 
-            if not doi_exists:
-                related_identifiers.append(
-                    {
-                        "relation_type": {"id": "issupplementedby"},
-                        "identifier": doi,
-                        "scheme": "doi",
-                        "resource_type": {"id": resource_type},
-                    }
-                )
-                print(f"Adding DOI: {doi}")
-            else:
-                print(f"DOI already exists")
-
-            if not author_url_exists:
-                related_identifiers.append(
-                    {
-                        "relation_type": {"id": "isreferencedby"},
-                        "identifier": author_url,
-                        "scheme": "url",
-                        "resource_type": {"id": resource_type},
-                    }
-                )
-                print(f"Adding CaltechAUTHORS link: {author_url}")
-            else:
-                print(f"CaltechAUTHORS link already exists")
+        record_data["metadata"]["related_identifiers"] = related_identifiers
 
-            record_data["metadata"]["related_identifiers"] = related_identifiers
+        update_response = requests.put(
+            f"{base_url}/api/records/{record_id}/draft",
+            headers=headers,
+            json=record_data,
+        )
+        if update_response.status_code != 200:
+            print(f"Error updating draft: {update_response.status_code}")
+            continue
 
-            update_response = requests.put(
-                f"{base_url}/api/records/{record_id}/draft",
-                headers=headers,
-                json=record_data,
+        publish_response = requests.post(
+            f"{base_url}/api/records/{record_id}/draft/actions/publish", headers=headers
+        )
+        if publish_response.status_code != 202:
+            print(
+                f"Error publishing record {record_id}: {publish_response.status_code}"
             )
-            if update_response.status_code != 200:
-                print(f"Error updating draft: {update_response.status_code}")
-                continue
+            results.append((record_id, False))
+            continue
 
-            publish_response = requests.post(
-                f"{base_url}/api/records/{record_id}/draft/actions/publish",
-                headers=headers,
-            )
-            if publish_response.status_code != 202:
-                print(
-                    f"Error publishing record {record_id}: {publish_response.status_code}"
-                )
-                continue
+        print(f"Successfully updated and published {record_id}")
+        results.append((record_id, True))
+    return results
 
-            print(f"Successfully updated and published {record_id}")
 
-    print("All records processed.")
+def process_link_updates(input_csv):
+    # read the CSV file and build a dictionary: record_id -> {"links": [(link, classification), ...]}
+    records_data = {}
+    with open(input_file, newline="") as f:
+        reader = csv.DictReader(f, delimiter=",")
+        for row in reader:
+            record_id = row["record_id"].strip()
+            link = row["link"].strip()
+            classification = row["classification"].strip()
+
+            if record_id not in records_data:
+                records_data[record_id] = {"links": []}
+            records_data[record_id]["links"].append((link, classification))
+
+    results = []
+
+    for record_id, record_info in records_data.items():
+        print(f"Processing record {record_id}")
+
+        # get metadata for the record
+        metadata = get_record_metadata(record_id)
+        if not metadata:
+            # if we failed to get metadata, record the error and continue
+            first_link = record_info["links"][0][0] if record_info["links"] else ""
+            results.append(
+                {
+                    "record_id": record_id,
+                    "link": first_link,
+                    "doi_check": None,
+                    "metadata_updated": False,
+                    "notes": "Failed to retrieve metadata",
+                }
+            )
+            continue
 
+        # check existing related identifiers in the record
+        related_identifiers = metadata.get("metadata", {}).get(
+            "related_identifiers", []
+        )
 
+        # run check_doi if a "doi" is present among the links
+        doi_check = None
+        for lk, ctype in record_info["links"]:
+            if ctype.lower() == "doi":
+                try:
+                    doi_check = check_doi(lk, production=True)
+                except Exception as e:
+                    doi_check = f"Error: {str(e)}"
+
+        # update related identifiers
+        updated_metadata, updated_flag = update_related_identifiers(
+            metadata, record_info["links"], source_type="data"
+        )
+        if updated_flag:
+            # saving to local JSON file for reference
+            save_metadata_to_file(updated_metadata, record_id)
+            pass
+
+        # preparing the final row for the results CSV
+        first_link = record_info["links"][0][0] if record_info["links"] else ""
+        results.append(
+            {
+                "record_id": record_id,
+                "link": first_link,
+                "doi_check": doi_check,
+                "metadata_updated": updated_flag,
+                "notes": "",
+            }
+        )
+    return results
+    
 def add_authors_affiliations(record, token, dimensions_key, allowed_identifiers=None):
     # Add dimensions affiliations to a record
 
diff --git a/codemeta.json b/codemeta.json
index 639447c7..9acd3040 100755
--- a/codemeta.json
+++ b/codemeta.json
@@ -30,6 +30,17 @@
             "email": "rsdoiel@caltech.edu",
             "@id": "https://orcid.org/0000-0003-0900-6903"
         },
+        {
+            "@type": "Person",
+            "givenName": "Rohan",
+            "familyName": "Bhattarai",
+            "affiliation": {
+                "@type": "Organization",
+                "name": "Caltech"
+            },
+            "email": "rbhattar@caltech.edu",
+            "@id": "https://orcid.org/0009-0007-0323-4733"
+        },
         {
             "@type": "Person",
             "givenName": "Elizabeth",
diff --git a/run_archives_report.py b/run_archives_report.py
index ebd0e69c..0e591fc2 100644
--- a/run_archives_report.py
+++ b/run_archives_report.py
@@ -139,6 +139,7 @@ def block_fields():
         "text_4",
     ]
 
+
 def accession_format_report(file_obj, repo, aspace, subject=None, years=None):
     fields = [
         "title",
@@ -187,7 +188,9 @@ def accession_report(file_obj, repo, aspace, subject=None, years=None):
         print(f"subject {subject} not found")
         exit()
     print(f"Requesting accessions")
-    file_obj.writerow(["title","identifier","accession_date","agent"] + block_fields())
+    file_obj.writerow(
+        ["title", "identifier", "accession_date", "agent"] + block_fields()
+    )
     for acc in repo.accessions:
         for uri in acc.subjects:
             if search_uri == uri.ref:
diff --git a/run_authors_affiliation_enhancement.py b/run_authors_affiliation_enhancement.py
index 9789bdfd..663d4ba6 100644
--- a/run_authors_affiliation_enhancement.py
+++ b/run_authors_affiliation_enhancement.py
@@ -23,13 +23,13 @@
 
 args = parser.parse_args()
 author_identifier = args.author_identifier
-#to_update = [get_metadata('6dmax-vx632',authors=True)]
+# to_update = [get_metadata('6dmax-vx632',authors=True)]
 to_update = get_author_records(author_identifier, token, all_metadata=True)
 
 for record in to_update:
     add_authors_affiliations(
-            record,
-            token,
-            dimensions_key,
-            allowed_identifiers=ror,
-        )
+        record,
+        token,
+        dimensions_key,
+        allowed_identifiers=ror,
+    )
diff --git a/run_authors_group_report.py b/run_authors_group_report.py
index c91e1aa5..529e1b49 100644
--- a/run_authors_group_report.py
+++ b/run_authors_group_report.py
@@ -3,15 +3,15 @@
 
 group_identifier = sys.argv[1]
 
-#outfile = open(f"{group_identifier}_report.csv", "w")
-#writer = csv.writer(outfile)
+# outfile = open(f"{group_identifier}_report.csv", "w")
+# writer = csv.writer(outfile)
 
 to_update = get_group_records(group_identifier)
 
 outfile = open(f"{group_identifier}_report.json", "w")
 outfile.write(json.dumps(to_update, indent=4))
 
-#for record in to_update:
+# for record in to_update:
 #    if "doi" not in record["pids"]:
 #        metadata = record["metadata"]
 #        publisher = ""
diff --git a/run_authors_name_update.py b/run_authors_name_update.py
index b1228f36..d1a6414b 100644
--- a/run_authors_name_update.py
+++ b/run_authors_name_update.py
@@ -20,7 +20,7 @@
 old_identifier = args.old_identifier
 new_identifier = args.new_identifier
 
-to_update = get_author_records(old_identifier,token)
+to_update = get_author_records(old_identifier, token)
 for record in to_update:
     if args.add:
         edit_author_identifier(
diff --git a/run_caltechauthors_get_links.py b/run_caltechauthors_get_links.py
new file mode 100644
index 00000000..cd486026
--- /dev/null
+++ b/run_caltechauthors_get_links.py
@@ -0,0 +1,16 @@
+from ames.harvesters.caltechauthors import get_data_availability_links
+import csv
+import os
+
+output_file = "test_results_get_links.csv"
+token = os.environ.get("RDMTOK")
+results = get_data_availability_links(token=token)
+
+if results:
+    with open(output_file, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=results[0].keys())
+        writer.writeheader()
+        writer.writerows(results)
+    print(f"Saved {len(results)} links to {output_file}")
+else:
+    print("No results.")
diff --git a/run_caltechauthors_harvestors.py b/run_caltechauthors_harvestors.py
index dc217f52..3e43d1a5 100644
--- a/run_caltechauthors_harvestors.py
+++ b/run_caltechauthors_harvestors.py
@@ -6,11 +6,13 @@
     extract_https_links,
     clean_link,
     extract_filename_from_link,
-    is_file_present
+    is_file_present,
 )
 
 base_url = "https://authors.library.caltech.edu/api/records?q=metadata.additional_descriptions.type.id%3A%22data-availability%22&size=25&sort=bestmatch"
-base_file_url_template = "https://authors.library.caltech.edu/api/records/{record_id}/files"
+base_file_url_template = (
+    "https://authors.library.caltech.edu/api/records/{record_id}/files"
+)
 
 token = os.environ.get("RDMTOK")
 
@@ -25,7 +27,9 @@
 
 response = requests.get(base_url, headers=headers)
 if response.status_code != 200:
-    print(f"Error: Unable to fetch records from the API. Status code: {response.status_code}")
+    print(
+        f"Error: Unable to fetch records from the API. Status code: {response.status_code}"
+    )
     exit()
 
 records = response.json().get("hits", {}).get("hits", [])
@@ -48,14 +52,16 @@
             filename = extract_filename_from_link(link)
             file_present = is_file_present(record_id, filename)
 
-            results.append({
-                "record_id": record_id,
-                "original_link": link,
-                "classification": classification,
-                "cleaned_link": cleaned,
-                "filename": filename,
-                "file_present": file_present
-            })
+            results.append(
+                {
+                    "record_id": record_id,
+                    "original_link": link,
+                    "classification": classification,
+                    "cleaned_link": cleaned,
+                    "filename": filename,
+                    "file_present": file_present,
+                }
+            )
 
 if results:
     with open(output_file, "w", newline="") as f:
diff --git a/run_caltechauthors_matchers.py b/run_caltechauthors_matchers.py
index 3d71948c..a5eb361a 100644
--- a/run_caltechauthors_matchers.py
+++ b/run_caltechauthors_matchers.py
@@ -25,9 +25,7 @@ def main():
             classification = row["classification"].strip()
 
             if record_id not in records_data:
-                records_data[record_id] = {
-                    "links": []
-                }
+                records_data[record_id] = {"links": []}
             records_data[record_id]["links"].append((link, classification))
 
     results = []
@@ -52,11 +50,13 @@ def main():
             continue
 
         # check existing related identifiers in the record
-        related_identifiers = metadata.get("metadata", {}).get("related_identifiers", [])
+        related_identifiers = metadata.get("metadata", {}).get(
+            "related_identifiers", []
+        )
 
         # run check_doi if a "doi" is present among the links
         doi_check = None
-        for (lk, ctype) in record_info["links"]:
+        for lk, ctype in record_info["links"]:
             if ctype.lower() == "doi":
                 try:
                     doi_check = check_doi(lk, production=True)
@@ -65,7 +65,7 @@ def main():
 
         # update related identifiers
         updated_metadata, updated_flag = update_related_identifiers(
-            metadata, record_info["links"], source_type="data"  
+            metadata, record_info["links"], source_type="data"
         )
         if updated_flag:
             # saving to local JSON file for reference
@@ -84,5 +84,6 @@ def main():
             }
         )
 
+
 if __name__ == "__main__":
     main()
diff --git a/run_caltechauthors_update_links.py b/run_caltechauthors_update_links.py
new file mode 100644
index 00000000..7f58fc48
--- /dev/null
+++ b/run_caltechauthors_update_links.py
@@ -0,0 +1,16 @@
+from ames.matchers.caltechauthors import process_link_updates
+import csv
+
+input_file = "non_publisher_links.csv"
+output_file = "test_results_update_links.csv"
+
+results = process_link_updates(input_file)
+
+if results:
+    with open(output_file, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=results[0].keys())
+        writer.writeheader()
+        writer.writerows(results)
+    print(f"Saved update results to {output_file}")
+else:
+    print("No results.")
diff --git a/run_harvest_links.py b/run_harvest_links.py
new file mode 100644
index 00000000..a5eb361a
--- /dev/null
+++ b/run_harvest_links.py
@@ -0,0 +1,89 @@
+import os
+import csv
+import json
+import requests
+
+from ames.matchers.caltechauthors import (
+    get_record_metadata,
+    update_related_identifiers,
+    save_metadata_to_file,
+    check_doi,
+)
+
+
+def main():
+    input_file = "non_publisher_links.csv"
+    output_file = "test_results_matchers.csv"
+
+    # read the CSV file and build a dictionary: record_id -> {"links": [(link, classification), ...]}
+    records_data = {}
+    with open(input_file, newline="") as f:
+        reader = csv.DictReader(f, delimiter=",")
+        for row in reader:
+            record_id = row["record_id"].strip()
+            link = row["link"].strip()
+            classification = row["classification"].strip()
+
+            if record_id not in records_data:
+                records_data[record_id] = {"links": []}
+            records_data[record_id]["links"].append((link, classification))
+
+    results = []
+
+    for record_id, record_info in records_data.items():
+        print(f"Processing record {record_id}")
+
+        # get metadata for the record
+        metadata = get_record_metadata(record_id)
+        if not metadata:
+            # if we failed to get metadata, record the error and continue
+            first_link = record_info["links"][0][0] if record_info["links"] else ""
+            results.append(
+                {
+                    "record_id": record_id,
+                    "link": first_link,
+                    "doi_check": None,
+                    "metadata_updated": False,
+                    "notes": "Failed to retrieve metadata",
+                }
+            )
+            continue
+
+        # check existing related identifiers in the record
+        related_identifiers = metadata.get("metadata", {}).get(
+            "related_identifiers", []
+        )
+
+        # run check_doi if a "doi" is present among the links
+        doi_check = None
+        for lk, ctype in record_info["links"]:
+            if ctype.lower() == "doi":
+                try:
+                    doi_check = check_doi(lk, production=True)
+                except Exception as e:
+                    doi_check = f"Error: {str(e)}"
+
+        # update related identifiers
+        updated_metadata, updated_flag = update_related_identifiers(
+            metadata, record_info["links"], source_type="data"
+        )
+        if updated_flag:
+            # saving to local JSON file for reference
+            save_metadata_to_file(updated_metadata, record_id)
+            pass
+
+        # preparing the final row for the results CSV
+        first_link = record_info["links"][0][0] if record_info["links"] else ""
+        results.append(
+            {
+                "record_id": record_id,
+                "link": first_link,
+                "doi_check": doi_check,
+                "metadata_updated": updated_flag,
+                "notes": "",
+            }
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_matchers.py b/tests/test_matchers.py
new file mode 100644
index 00000000..a04da0f9
--- /dev/null
+++ b/tests/test_matchers.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import csv
+import os
+import random
+import sys
+import unittest
+
+import requests
+
+# Ensure the local project package is importable when the repo root is the CWD.
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+from ames.matchers.caltechauthors import add_related_identifiers_from_csv  # noqa: E402
+
+TOKEN = os.getenv("RDMTOK")
+CSV_FILE = "test.csv"
+
+print(f"[init] RDMTOK present: {'YES' if TOKEN else 'NO'} (len={len(TOKEN) if TOKEN else 0})")
+
+
+def load_test_data(from_csv: bool = True):
+    """Return rows for the upload function, from CSV when available."""
+    if from_csv and os.path.exists(CSV_FILE):
+        with open(CSV_FILE, newline="") as fh:
+            return list(csv.DictReader(fh))
+
+    doi_stub = random.randint(1000, 9999)
+    return [
+        {
+            "CaltechAUTHORS_ID": "bwww3-z8y74",
+            "CaltechAUTHORS_DOI": f"10.1093/mnras/staa{doi_stub}",
+            "Related_DOI": "10.22002/D1.1458",
+            "Data_ID": "3hqgp-jhw61",
+            "Cross_Link": "No",
+            "Test_ID": "99s7k-d6f58",
+            "resource_type": "publication-article",
+        }
+    ]
+
+
+def verify_related_identifiers_on_site(rows, *, test: bool = True):
+    """Fetch each record and report which links are present or missing."""
+    base = (
+        "https://data.caltechlibrary.dev"
+        if test
+        else "https://data.caltechlibrary.caltech.edu"
+    )
+    headers = {"Authorization": f"Bearer {TOKEN}"}
+    results = []
+
+    for row in rows:
+        record_id = row["Test_ID"]
+        doi = row["CaltechAUTHORS_DOI"]
+        author_link = f"https://authors.library.caltech.edu/records/{row['CaltechAUTHORS_ID']}"
+
+        resp = requests.get(f"{base}/api/records/{record_id}", headers=headers)
+        print(f"[verify] {record_id}: {resp.status_code}")
+        if resp.status_code != 200:
+            print("    Error: could not fetch record from server.")
+            results.append((record_id, False))
+            continue
+
+        related = resp.json().get("metadata", {}).get("related_identifiers", [])
+        has_doi = any(x["identifier"] == doi for x in related)
+        has_author = any(x["identifier"] == author_link for x in related)
+
+        status_parts = [
+            "DOI link present" if has_doi else "DOI link missing",
+            "CaltechAUTHORS link present" if has_author else "CaltechAUTHORS link missing",
+        ]
+        print("    " + "; ".join(status_parts))
+
+        results.append((record_id, has_doi and has_author))
+
+    return results
+
+
+class TestCaltechDataUploader(unittest.TestCase):
+    @unittest.skipUnless(TOKEN, "needs RDMTOK to hit CaltechDATA API")
+    def test_add_and_verify_related_identifiers(self):
+        rows = load_test_data(from_csv=False)
+
+        uploads = add_related_identifiers_from_csv(rows, TOKEN, test=True)
+        for record_id, ok in uploads:
+            self.assertTrue(ok, f"upload failed for {record_id}")
+
+        verifies = verify_related_identifiers_on_site(rows, test=True)
+        for record_id, ok in verifies:
+            self.assertTrue(ok, f"verification failed for {record_id}")
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)