Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Cscfairmeta 847 relax draft validation #740

Merged
merged 3 commits into from
Jan 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 239 additions & 0 deletions src/metax_api/api/rest/v2/schemas/dft_dataset_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,45 @@
],
"additionalProperties": true
},
"Checksum":{
"title":"Checksum",
"type":"object",
"@id":"http://spdx.org/rdf/terms#Checksum",
"description":"",
"minProperties":1,
"properties":{
"algorithm":{
"@id":"http://spdx.org/rdf/terms#algorithm",
"title":"Algorithm",
"description":"Used checksum algortihm",
"enum":[
"MD5",
"SHA-1",
"SHA-224",
"SHA-256",
"SHA-384",
"SHA-512",
"OTHER"
],
"@type":"http://www.w3.org/2001/XMLSchema#string",
"minLength":1,
"type":"string"
},
"checksum_value":{
"@id":"http://spdx.org/rdf/terms#checksumValue",
"title":"Checksum value",
"description":"Value of the checksum. xsd:hexBinary",
"@type":"http://www.w3.org/2001/XMLSchema#hexBinary",
"minLength":1,
"type":"string"
}
},
"required":[
"checksum_value",
"algorithm"
],
"additionalProperties": false
},
"Concept":{
"title":"Concept",
"type":"object",
Expand Down Expand Up @@ -482,6 +521,54 @@
"identifier"
]
},
"DistributionCharacteristics":{
"title":"File characteristics",
"type":"object",
"@id":"http://www.loc.gov/premis/rdf/v1#ObjectCharasteristics",
"description":"Technical properties of files",
"minProperties":1,
"properties":{
"title":{
"@id":"http://purl.org/dc/terms/title",
"title":"File Type Name",
"description":"A name of the file type",
"@type":"http://www.w3.org/2001/XMLSchema#string",
"minLength":1,
"type":"string"
},
"description":{
"@id":"http://purl.org/dc/terms/description",
"title":"File Type Description",
"description":"Description of the file type",
"@type":"http://www.w3.org/2001/XMLSchema#string",
"minLength":1,
"type":"string"
},
"application_name":{
"@id":"http://www.loc.gov/premis/rdf/v1#hasCreatingApplicationName",
"title":"Application name",
"description":"A designation for the name of the software program that created the object",
"@type":"http://www.w3.org/2001/XMLSchema#string",
"minLength":1,
"type":"string",
"example":[
"MSWord"
]
},
"encoding":{
"@id":"http://schema.org/encoding",
"title":"Text encoding",
"description":"Used text encoding",
"@type":"http://www.w3.org/2001/XMLSchema#string",
"minLength":1,
"type":"string"
}
},
"required":[
"title"
],
"additionalProperties": false
},
"EntityRelation":{
"title":"Entity relation",
"type":"object",
Expand Down Expand Up @@ -1349,6 +1436,17 @@
"$ref":"#/definitions/Concept"
}
},
"remote_resources":{
"@id":"http://www.w3.org/ns/dcat#distribution",
"title":"Remote resources",
"description":"A concrete storage or expression format for the data in the dataset, for example a file, a database or a query interface to the data.",
"@type":"@id",
"type":"array",
"items":{
"type":"object",
"$ref":"#/definitions/WebResource"
}
},
"access_rights":{
"@id":"http://www.w3.org/ns/dcat#accessRights",
"title":"Access rights",
Expand Down Expand Up @@ -1425,6 +1523,14 @@
"@type":"http://www.w3.org/2001/XMLSchema#integer",
"type":"integer",
"readOnly": true
},
"total_remote_resources_byte_size":{
"@id":"http://uri.suomi.fi/datamodel/ns/mrd#totalremoteresourcesbytesize",
"title":"Total remote resources byte size",
"description":"Sum of all remote resources distribution byte size. Read only field.",
"@type":"http://www.w3.org/2001/XMLSchema#integer",
"type":"integer",
"readOnly": true
}
},
"required":[
Expand Down Expand Up @@ -1611,6 +1717,139 @@
],
"additionalProperties": false
},
"WebResource":{
"title":"Remote Web Resource",
"type":"object",
"@id":"http://www.w3.org/ns/dcat#Distribution",
"description":"Represents single available resource in the web.",
"minProperties":1,
"properties":{
"identifier":{
"@id":"http://purl.org/dc/terms/identifier",
"title":"Identifier",
"description":"Unique identifier for the distribution from file: scheme",
"@type":"@id",
"minLength":1,
"type":"string",
"format":"uri",
"example":[
"file://etc/data/abc.txt"
]
},
"title":{
"@id":"http://purl.org/dc/terms/title",
"title":"Title",
"description":"A name given to the distribution.",
"@type":"http://www.w3.org/2001/XMLSchema#string",
"minLength":1,
"type":"string"
},
"description":{
"@id":"http://purl.org/dc/terms/description",
"title":"Description",
"description":"Free-text account of the distribution.",
"@type":"http://www.w3.org/2001/XMLSchema#string",
"minLength":1,
"type":"string"
},
"modified":{
"@id":"http://purl.org/dc/terms/modified",
"title":"Date Modified",
"description":"Date on which the resource was changed.",
"@type":"http://www.w3.org/2001/XMLSchema#dateTime",
"minLength":1,
"type":"string",
"format":"date-time"
},
"byte_size":{
"@id":"http://www.w3.org/ns/dcat#byteSize",
"title":"Byte size",
"description":"The size of a distribution in bytes.",
"@type":"http://www.w3.org/2001/XMLSchema#integer",
"type":"integer"
},
"access_url":{
"@id":"http://www.w3.org/ns/dcat#accessURL",
"title":"Access URL",
"description":"Could be any kind of URL that gives access to a distribution of the dataset. E.g. landing page download, feed URL, SPARQL endpoint. Use when your catalog does not have information on which it is or when it is definitely not a download.",
"@type":"@id",
"type":"object",
"$ref":"#/definitions/Document"
},
"download_url":{
"@id":"http://www.w3.org/ns/dcat#downloadURL",
"title":"Download URL",
"description":"This is a direct link to a downloadable file in a given format. E.g. CSV file or RDF file. The format is described by the distribution's dc:format and/or dcat:mediaType",
"@type":"@id",
"type":"object",
"$ref":"#/definitions/Document"
},
"checksum":{
"@id":"http://spdx.org/rdf/terms#checksum",
"title":"Checksum",
"description":"Checksum of the manifestation",
"@type":"@id",
"type":"object",
"$ref":"#/definitions/Checksum"
},
"license":{
"@id":"http://purl.org/dc/terms/license",
"title":"License",
"description":"A legal document giving official permission to do something with the resource.",
"@type":"@id",
"type":"array",
"items":{
"type":"object",
"$ref":"#/definitions/ResearchDataLicenseDocument"
}
},
"file_type":{
"@id":"http://purl.org/dc/terms/type",
"title":"File type",
"description":"File type from reference data",
"@type":"@id",
"type":"object",
"$ref":"#/definitions/Concept"
},
"resource_type":{
"@id":"http://uri.suomi.fi/datamodel/ns/mrd#resourcetype",
"title":"Resource type",
"description":"type of the resource",
"@type":"@id",
"type":"object",
"$ref":"#/definitions/Concept"
},
"use_category":{
"@id":"http://uri.suomi.fi/datamodel/ns/mrd#usetype",
"title":"Use category",
"description":"Use type of the resource",
"@type":"@id",
"type":"object",
"$ref":"#/definitions/Concept"
},
"has_object_characteristics":{
"@id":"http://www.loc.gov/premis/rdf/v1#hasObjectCharacteristics",
"title":"File charasteristics",
"description":"Fileformat specific charasteristics",
"@type":"@id",
"type":"object",
"$ref":"#/definitions/DistributionCharacteristics"
},
"mediatype":{
"@id":"http://purl.org/dc/terms/format",
"title":"File format",
"description":"Mediatype as text from https://www.iana.org/assignments/media-types/",
"@type":"http://www.w3.org/2001/XMLSchema#string",
"minLength": 1,
"type":"string"
}
},
"required":[
"use_category",
"title"
],
"additionalProperties": false
},
"langString":{
"type":"object",
"minProperties": 1,
Expand Down
18 changes: 18 additions & 0 deletions src/metax_api/api/rest/v2/serializers/catalog_record_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,21 @@ def validate_research_dataset_files(self, value):
raise ValidationError({ 'detail':
['%s. Json path: %s. Schema: %s' % (e.message, [p for p in e.path], e.schema)]
})

def _set_dataset_schema(self):
if self._validate_as_draft():
# drafts only exists for V2 records, otherwise normal rules apply
schema_prefix = 'dft'
self.json_schema = CS.get_json_schema(self._schemas_directory_path, 'dataset', schema_prefix)

else:
super()._set_dataset_schema()

def _validate_as_draft(self):
if self._operation_is_create and "request" in self.context:
return CS.get_boolean_query_param(self.context["request"], 'draft')

if CS.request_is_create_operation(self.instance.request):
return self.instance._save_as_draft()

return True
13 changes: 10 additions & 3 deletions src/metax_api/models/catalog_record_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from django.conf import settings
from django.db.models import Q
from rest_framework.serializers import ValidationError

from metax_api.exceptions import Http400, Http403
from metax_api.utils import (
Expand Down Expand Up @@ -285,6 +286,15 @@ def publish_dataset(self, pid_type=None):
if self.api_version != self.api_meta['version']:
self._set_api_version()

try:
# drafts are validated with different schema until the publication so check that final result is valid
# according to the actual data catalog.
serializer = self.serializer_class(self, context={'request': self.request}, data=self._initial_data)
serializer.validate_research_dataset(self.research_dataset)
except ValidationError as e:
# apierrors couldn't handle the validation error thrown by the serializer
raise Http400(e)

super(Common, self).save()

_logger.info(
Expand Down Expand Up @@ -716,9 +726,6 @@ def change_files(self, file_changes, operation_is_create=False):
_logger.debug('Received data does not include files or directories - returning')
return

if self.catalog_is_dft():
raise Http400('Adding files in draft catalog is not permitted. Please select valid datacatalog first.')

# create an instance of the serializer for later validations
serializer = self.serializer_class(self)

Expand Down
4 changes: 4 additions & 0 deletions src/metax_api/services/common_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,10 @@ def _request_has_header(request, header_name):
def _request_is_write_operation(request):
return request.method in ('POST', 'PUT', 'PATCH', 'DELETE')

@staticmethod
def request_is_create_operation(request):
return request.method in ('POST')

@classmethod
def check_if_unmodified_since(cls, request, obj):
if cls._request_is_write_operation(request) and \
Expand Down
18 changes: 13 additions & 5 deletions src/metax_api/tests/api/rest/v2/views/datasets/drafts.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def setUp(self):
catalog_json=catalog_json,
date_created=get_tz_aware_now_without_micros(),
catalog_record_services_create='testuser,api_auth_user,metax',
catalog_record_services_edit='testuser,api_auth_user,metax'
catalog_record_services_edit='testuser,api_auth_user,metax',
catalog_record_services_read='testuser,api_auth_user,metax'
)

self.minimal_draft = {
Expand Down Expand Up @@ -380,8 +381,9 @@ def test_minimal_draft_dataset_creation(self):
response = self.client.post('/rest/v2/datasets?draft', self.minimal_draft, format="json")
self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data)

def test_no_files_or_dirs_in_draft_catalog(self):
''' Files cannot be added to datasets that are in draft catalog '''
def test_allow_files_and_dirs_in_draft_catalog(self):
''' Files can be added to datasets that are in draft catalog '''
self._use_http_authorization(method='basic', username='metax')

for type in ['files', 'directories']:
self.minimal_draft['research_dataset'][type] = [
Expand All @@ -391,8 +393,11 @@ def test_no_files_or_dirs_in_draft_catalog(self):
]

response = self.client.post('/rest/v2/datasets?draft', self.minimal_draft, format="json")
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST, response.data)
self.assertTrue('files in draft catalog' in response.data['detail'][0], response.data)
self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.data)

response = self.client.get(f'/rest/v2/datasets/{response.data["id"]}/files', format="json")
self.assertEqual(response.status_code, status.HTTP_200_OK, response.data)
self.assertTrue(response.data, response.data)

self.minimal_draft['research_dataset'].pop(type)

Expand Down Expand Up @@ -421,6 +426,9 @@ def test_prevent_update_published_dataset_to_draft_catalog(self):
response = self.client.put(f'/rest/v2/datasets/{cr["id"]}', cr, format="json")
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST, response.data)

def test_allow_remote_resources_in_ida_for_drafts(self):
"""
When dataset is in draft state, it should be validated with """
class CatalogRecordDraftsOfPublished(CatalogRecordApiWriteCommon):

"""
Expand Down
Loading