Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions easyDataverse/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ def update(self):

update_dataset(
to_change=self._extract_changes(),
license=self.license,
p_id=self.p_id, # type: ignore
files=self.files,
DATAVERSE_URL=str(self.DATAVERSE_URL), # type: ignore
Expand Down
52 changes: 51 additions & 1 deletion easyDataverse/license.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import List, Optional
from urllib import parse
from pydantic import BaseModel, ConfigDict, Field
import httpx
Expand Down Expand Up @@ -70,6 +70,28 @@ def fetch_by_name(cls, name: str, server_url: str) -> "License":
except StopIteration:
raise Exception(f"License '{name}' not found at '{server_url}'")

def to_json_ld(self):
"""
Convert the license to JSON-LD format.

Returns:
dict: A dictionary containing the license information in JSON-LD format,
with the license URI mapped to the schema:license property.
"""
return {
"schema:license": self.uri,
}

@staticmethod
def json_ld_field_names() -> List[str]:
"""
Get the JSON-LD field names for the license.

Returns:
List[str]: A list of JSON-LD field names for the license.
"""
return ["schema:license"]


class CustomLicense(BaseModel):
"""
Expand Down Expand Up @@ -130,3 +152,31 @@ class CustomLicense(BaseModel):
description="Disclaimer for the dataset.",
alias="disclaimer",
)

def to_json_ld(self):
"""Convert the custom license to JSON-LD format.

Returns:
dict: A dictionary with keys prefixed with 'dvcore:' containing
the license fields in JSON-LD format, excluding None values.
"""
return {
f"dvcore:{k}": v
for k, v in self.model_dump(
mode="json",
exclude_none=True,
by_alias=True,
).items()
}

@staticmethod
def json_ld_field_names() -> List[str]:
"""
Get the JSON-LD field names for the custom license.

Returns:
List[str]: A list of JSON-LD field names for the custom license.
"""
return [
f"dvcore:{field.alias}" for field in CustomLicense.model_fields.values()
]
128 changes: 127 additions & 1 deletion easyDataverse/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@

from rich.panel import Panel
from rich.console import Console
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union
from dvuploader import File, DVUploader

from pyDataverse.api import NativeApi, DataAccessApi
from pyDataverse.models import Dataset

from easyDataverse.license import CustomLicense, License


def upload_to_dataverse(
json_data: str,
Expand Down Expand Up @@ -118,6 +120,7 @@ def update_dataset(
files: List[File],
DATAVERSE_URL: Optional[str] = None,
API_TOKEN: Optional[str] = None,
license: Optional[Union[CustomLicense, License]] = None,
) -> bool:
"""Uploads and updates the metadata of a draft dataset.

Expand All @@ -141,6 +144,14 @@ def update_dataset(
api_token=API_TOKEN, # type: ignore
)

if license is not None:
_update_license(
p_id=p_id,
license=license,
base_url=DATAVERSE_URL, # type: ignore
api_token=API_TOKEN, # type: ignore
)

_uploadFiles(
files=files,
p_id=p_id,
Expand Down Expand Up @@ -173,3 +184,118 @@ def _update_metadata(

response = httpx.put(EDIT_ENDPOINT, headers=headers, json=to_change)
response.raise_for_status()


def _update_license(
p_id: str,
license: Union[CustomLicense, License],
base_url: str,
api_token: str,
):
"""Updates the license of a dataset.

Args:
p_id (str): Persistent ID of the dataset.
license (Union[CustomLicense, License]): License object to update.
base_url (str): URL of the dataverse instance.
api_token (str): API token of the user.

Raises:
AssertionError: If license is not a License or CustomLicense instance.
Exception: If the JSON-LD metadata update fails.
"""
assert isinstance(license, (License, CustomLicense)), (
"License must be a License or CustomLicense"
)

headers = {
"X-Dataverse-key": api_token,
"Accept": "application/ld+json",
"Content-Type": "application/ld+json",
}

# First, fetch the JSON-LD metadata
data = _fetch_json_ld_metadata(
p_id=p_id,
base_url=base_url,
headers=headers,
)

if isinstance(license, CustomLicense):
for field in License.json_ld_field_names():
data.pop(field, None)
data.update(license.to_json_ld())
else:
for field in CustomLicense.json_ld_field_names():
data.pop(field, None)
data.update(license.to_json_ld())

# Then, update the JSON-LD metadata on the server
_update_json_ld_metadata(
p_id=p_id,
data=data,
base_url=base_url,
headers=headers,
)


def _fetch_json_ld_metadata(
p_id: str,
base_url: str,
headers: Dict[str, str],
):
"""Fetches JSON-LD metadata for a dataset.

Args:
p_id (str): Persistent ID of the dataset.
base_url (str): URL of the dataverse instance.
headers (Dict[str, str]): HTTP headers including API token.

Returns:
Dict: The JSON-LD metadata for the dataset.

Raises:
httpx.HTTPError: If the request fails.
AssertionError: If the response doesn't contain expected data structure.
"""
response = httpx.get(
f"{base_url.rstrip('/')}/api/datasets/:persistentId/metadata?persistentId={p_id}",
headers=headers,
)
response.raise_for_status()
content = response.json()
assert "data" in content

return content["data"]


def _update_json_ld_metadata(
p_id: str,
data: Dict,
base_url: str,
headers: Dict[str, str],
):
"""Updates JSON-LD metadata for a dataset.

Args:
p_id (str): Persistent ID of the dataset.
data (Dict): The JSON-LD metadata to update.
base_url (str): URL of the dataverse instance.
headers (Dict[str, str]): HTTP headers including API token.

Returns:
Dict: The response from the server.

Raises:
Exception: If the update fails (status code != 200).
"""
response = httpx.put(
f"{base_url.rstrip('/')}/api/datasets/:persistentId/metadata?persistentId={p_id}&replace=true",
headers=headers,
json=data,
)

if response.status_code != 200:
raise httpx.HTTPError(f"Failed to update JSON-LD metadata: {response.text}")

return response.json()
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,11 @@ def minimal_upload_other_license():
Returns the contents of the 'minimal_upload.json' file as a dictionary.
"""
return json.load(open("tests/fixtures/minimal_upload_other_license.json"))


@pytest.fixture()
def minimal_upload_custom_license():
"""
Returns the contents of the 'minimal_upload.json' file as a dictionary.
"""
return json.load(open("tests/fixtures/minimal_upload_custom_license.json"))
84 changes: 84 additions & 0 deletions tests/fixtures/minimal_upload_custom_license.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
"datasetVersion": {
"termsOfUse": "This dataset is provided for research and educational purposes only. Commercial use is prohibited without explicit permission from the dataset owner.",
"confidentialityDeclaration": "This dataset contains no confidential or personally identifiable information. All data has been anonymized and aggregated for research purposes.",
"citationRequirements": "When using this dataset, please cite: Doe, John (2024). My dataset. [Dataset]. Available at: http://localhost:8080",
"conditions": "Users must acknowledge the source of the data and agree not to redistribute the dataset without permission. Any publications using this data should include proper attribution.",
"citation": "John Doe, 2025, My dataset, https://doi.org/10.5072/FK2/VJCLOP, Root, V1",
"metadataBlocks": {
"citation": {
"fields": [
{
"multiple": true,
"typeClass": "compound",
"typeName": "author",
"value": [
{
"authorName": {
"multiple": false,
"typeClass": "primitive",
"typeName": "authorName",
"value": "John Doe"
}
}
]
},
{
"multiple": true,
"typeClass": "compound",
"typeName": "datasetContact",
"value": [
{
"datasetContactName": {
"multiple": false,
"typeClass": "primitive",
"typeName": "datasetContactName",
"value": "John Doe"
},
"datasetContactEmail": {
"multiple": false,
"typeClass": "primitive",
"typeName": "datasetContactEmail",
"value": "john@doe.com"
}
}
]
},
{
"multiple": true,
"typeClass": "compound",
"typeName": "dsDescription",
"value": [
{
"dsDescriptionValue": {
"multiple": false,
"typeClass": "primitive",
"typeName": "dsDescriptionValue",
"value": "This is a description of the dataset"
},
"dsDescriptionDate": {
"multiple": false,
"typeClass": "primitive",
"typeName": "dsDescriptionDate",
"value": "2024"
}
}
]
},
{
"multiple": true,
"typeClass": "controlledVocabulary",
"typeName": "subject",
"value": ["Other"]
},
{
"multiple": false,
"typeClass": "primitive",
"typeName": "title",
"value": "My dataset"
}
]
}
}
}
}
Loading