3434from renku .core .dataset .providers .repository import RepositoryImporter , make_request
3535from renku .core .util import communication
3636from renku .core .util .doi import is_doi
37+ from renku .core .util .requests import get_redirect_url
3738from renku .core .util .urls import remove_credentials
3839from renku .domain_model .project_context import project_context
3940
@@ -80,7 +81,9 @@ def supports(uri):
8081 @staticmethod
8182 def get_record_id (uri ):
8283 """Extract record id from URI."""
83- return urlparse (uri ).path .split ("/" )[- 1 ]
84+ parts = urlparse (uri ).path .split ("/" )
85+ parts = [p for p in parts if p .isdigit ()]
86+ return parts [- 1 ]
8487
8588 @staticmethod
8689 def get_export_parameters () -> List ["ProviderParameter" ]:
@@ -121,7 +124,7 @@ def __init__(self, *, uri: str, original_uri, json: Dict[str, Any]):
121124
122125 metadata = self ._json .pop ("metadata" , {})
123126 self ._json ["metadata" ] = ZenodoMetadataSerializer .from_metadata (metadata ) if metadata is not None else None
124- record_id = self ._json .pop ("record_id" , None )
127+ record_id = self ._json .pop ("record_id" , None ) or self . _json . pop ( "recid" , None )
125128 self ._json ["record_id" ] = str (record_id ) if record_id is not None else None
126129
127130 # NOTE: Make sure that these properties have a default value
@@ -136,11 +139,11 @@ def version(self):
136139 @property
137140 def latest_uri (self ):
138141 """Get URI of latest version."""
139- return self ._json ["links" ].get ("latest_html" )
142+ return get_redirect_url ( self ._json ["links" ].get ("latest" ) )
140143
141144 def is_latest_version (self ):
142145 """Check if this record is the latest version."""
143- return ZenodoProvider .get_record_id (self ._json [ "links" ]. get ( "latest_html" ) ) == self ._json ["record_id" ]
146+ return ZenodoProvider .get_record_id (self .latest_uri ) == self ._json ["record_id" ]
144147
145148 def get_jsonld (self ):
146149 """Get record metadata as jsonld."""
@@ -173,18 +176,19 @@ def fetch_provider_dataset(self) -> "ProviderDataset":
173176 from renku .domain_model .dataset import Url , generate_default_slug
174177
175178 class ZenodoDatasetSchema (ProviderDatasetSchema ):
176- """Schema for Dataverse datasets."""
179+ """Schema for Zenodo datasets."""
177180
178181 @pre_load
179182 def fix_data (self , data , ** kwargs ):
180- """Fix data that is received from Dataverse ."""
183+ """Fix data that is received from Zenodo ."""
181184 # Fix context
182185 context = data .get ("@context" )
183186 if context and isinstance (context , str ):
187+ if not context .endswith ("/" ):
188+ context = f"{ context } /"
184189 if context == "https://schema.org/" :
185190 context = "http://schema.org/"
186191 data ["@context" ] = {"@base" : context , "@vocab" : context }
187-
188192 # Add type to creators
189193 creators = data .get ("creator" , [])
190194 for c in creators :
@@ -194,6 +198,10 @@ def fix_data(self, data, **kwargs):
194198 license = data .get ("license" )
195199 if license and isinstance (license , dict ):
196200 data ["license" ] = license .get ("url" , "" )
201+ # fix keywords to be a list
202+ keywords = data .get ("keywords" )
203+ if keywords and isinstance (keywords , str ):
204+ data ["keywords" ] = [k .strip () for k in keywords .split ("," )]
197205
198206 # Delete existing isPartOf
199207 data .pop ("isPartOf" , None )
@@ -228,17 +236,17 @@ def fix_data(self, data, **kwargs):
228236class ZenodoFileSerializer :
229237 """Zenodo record file."""
230238
231- def __init__ (self , * , id = None , checksum = None , links = None , filename = None , filesize = None ):
239+ def __init__ (self , * , id = None , checksum = None , links = None , key = None , size = None , ** kwargs ):
232240 self .id = id
233241 self .checksum = checksum
234242 self .links = links
235- self .filename = filename
236- self .filesize = filesize
243+ self .filename = key
244+ self .filesize = size
237245
238246 @property
239247 def remote_url (self ):
240248 """Get remote URL as ``urllib.ParseResult``."""
241- return urllib .parse .urlparse (self .links ["download " ])
249+ return urllib .parse .urlparse (self .links ["self " ])
242250
243251 @property
244252 def type (self ):
@@ -325,7 +333,10 @@ def from_metadata(cls, metadata: Dict[str, Any]) -> "ZenodoMetadataSerializer":
325333class ZenodoExporter (ExporterApi ):
326334 """Zenodo export manager."""
327335
328- HEADERS = {"Content-Type" : "application/json" }
336+ HEADERS = {
337+ "Content-Type" : "application/json" ,
338+ "Referer" : f"https://{ os .environ .get ('RENKU_DOMAIN' , 'zenodo.org' )} " ,
339+ }
329340
330341 def __init__ (self , dataset , publish , tag ):
331342 super ().__init__ (dataset )
@@ -503,7 +514,9 @@ def publish_deposition(self):
503514 """Publish existing deposition."""
504515 from renku .core .util import requests
505516
506- response = requests .post (url = self .publish_url , params = self .exporter .default_params )
517+ response = requests .post (
518+ url = self .publish_url , params = self .exporter .default_params , headers = self .exporter .HEADERS
519+ )
507520 self ._check_response (response )
508521
509522 return response
@@ -517,14 +530,21 @@ def _check_response(response):
517530 except errors .RequestError :
518531 if response .status_code == 400 :
519532 err_response = response .json ()
520- messages = [
521- '"{}" failed with "{}"' .format (err ["field" ], err ["message" ]) for err in err_response ["errors" ]
522- ]
533+ if "errors" in err_response :
534+ messages = [
535+ '"{}" failed with "{}"' .format (err ["field" ], ", " .join (err ["messages" ]))
536+ for err in err_response ["errors" ]
537+ ]
538+ elif "message" in err_response :
539+ messages = [err_response ["message" ]]
540+ else :
541+ messages = [response .text ()]
523542
524543 raise errors .ExportError (
525544 "\n " + "\n " .join (messages ) + "\n See `renku dataset edit -h` for details on how to edit" " metadata"
526545 )
527546 else :
547+ print (response .status_code )
528548 raise errors .ExportError (response .content )
529549
530550
0 commit comments