Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Commit

Permalink
Production update 22/03/1
Browse files Browse the repository at this point in the history
  • Loading branch information
tonurmi committed Feb 25, 2022
2 parents c8d7f1d + ceac2ae commit 139cce7
Show file tree
Hide file tree
Showing 13 changed files with 439 additions and 87 deletions.
104 changes: 44 additions & 60 deletions src/metax_api/api/oaipmh/base/metax_oai_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
# :license: MIT

import datetime
from urllib import parse

from django.conf import settings
from django.db.models import QuerySet
from django.utils import timezone
from oaipmh import common
from oaipmh.common import ResumptionOAIPMH
Expand Down Expand Up @@ -62,13 +64,9 @@ def _get_default_set_filter():
return catalog_urns

def _get_urnresolver_record_data(self, set, cursor, batch_size, from_=None, until=None):
proxy = CatalogRecord
if set == DATACATALOGS_SET:
proxy = DataCatalog

# Use unfiltered objects for fetching catalog records for urn resolver, since otherwise deleted objects
# won't appear in the result. Get only active objects.
records = proxy.objects_unfiltered.filter(active=True)
records = CatalogRecord.objects_unfiltered.filter(active=True, state="published")

if from_ and until:
records = records.filter(date_modified__gte=from_, date_modified__lte=until)
Expand Down Expand Up @@ -109,19 +107,19 @@ def _get_urnresolver_record_data(self, set, cursor, batch_size, from_=None, unti
def _get_filtered_records_data(
self, verb, metadata_prefix, set, cursor, batch_size, from_=None, until=None
):
proxy = CatalogRecord
query_set: QuerySet
if set == DATACATALOGS_SET:
proxy = DataCatalog

# For NON urn resolver, only get non-deleted active objects
query_set = proxy.objects.all()
query_set = DataCatalog.objects.all()
else:
# For NON urn resolver, only get non-deleted active CatalogRecords
query_set = CatalogRecord.objects.filter(active=True, state="published")

if from_ and until:
query_set = proxy.objects.filter(date_modified__gte=from_, date_modified__lte=until)
query_set = query_set.filter(date_modified__gte=from_, date_modified__lte=until)
elif from_:
query_set = proxy.objects.filter(date_modified__gte=from_)
query_set = query_set.filter(date_modified__gte=from_)
elif until:
query_set = proxy.objects.filter(date_modified__lte=until)
query_set = query_set.filter(date_modified__lte=until)

if set:
if set == DATACATALOGS_SET:
Expand All @@ -134,7 +132,6 @@ def _get_filtered_records_data(
query_set = query_set.filter(
data_catalog__catalog_json__identifier__in=self._get_default_set_filter()
)
query_set = query_set.filter(state="published")

data = []
for record in query_set:
Expand Down Expand Up @@ -171,16 +168,22 @@ def _get_filtered_records_data(
cursor_end = cursor + batch_size if cursor + batch_size < len(data) else len(data)
return data[cursor:cursor_end]

def _handle_syke_urnresolver_metadata(self, record):
identifiers = []
preferred_identifier = record.research_dataset.get("preferred_identifier")
identifiers.append(preferred_identifier)
for id_obj in record.research_dataset.get("other_identifier", []):
def _get_syke_urnresolver_metadata_for_record(self, record):
metadatas = []
pref_id = record["research_dataset"].get("preferred_identifier")
for id_obj in record["research_dataset"].get("other_identifier", []):
if id_obj.get("notation", "").startswith("{"):
uuid = id_obj["notation"]
identifiers.append(SYKE_URL_PREFIX_TEMPLATE % uuid)
metadatas.append(
{
"identifier": [
SYKE_URL_PREFIX_TEMPLATE % parse.quote(uuid),
pref_id,
]
}
)
break
return identifiers
return metadatas

def _get_oai_dc_urnresolver_metadatas_for_record(self, record):
"""
Expand All @@ -197,23 +200,10 @@ def _get_oai_dc_urnresolver_metadatas_for_record(self, record):
pref_id = record["research_dataset"].get("preferred_identifier")
dc_id = record["data_catalog__catalog_json"].get("identifier")
is_harvested = record["data_catalog__catalog_json"].get("harvested", False) is True
if record["research_dataset"].get("other_identifier") is not None:
other_ids = record["research_dataset"].get("other_identifier")
else:
other_ids = []
other_ids = record["research_dataset"].get("other_identifier", [])

if dc_id == "urn:nbn:fi:att:data-catalog-harvest-syke":
for id_obj in other_ids:
if id_obj.get("notation", "").startswith("{"):
metadatas.append(
{
"identifier": [
SYKE_URL_PREFIX_TEMPLATE % id_obj["notation"],
pref_id,
]
}
)
break
metadatas.extend(self._get_syke_urnresolver_metadata_for_record(record))

elif dc_id not in settings.LEGACY_CATALOGS:
resolution_url = settings.OAI["ETSIN_URL_TEMPLATE"] % record["identifier"]
Expand All @@ -235,6 +225,17 @@ def _get_oaic_dc_value(self, value, lang=None):
valueDict["lang"] = lang
return valueDict

def _get_actors_oaic_dc_value(self, actors):
names = []
for actor in actors:
if "name" in actor:
if isinstance(actor["name"], dict):
for key, val in actor["name"].items():
names.append(self._get_oaic_dc_value(val, key))
else:
names.append(self._get_oaic_dc_value(actor.get("name")))
return names

def _get_oai_dc_metadata(self, record, json):
identifier = []
if "preferred_identifier" in json:
Expand All @@ -247,15 +248,7 @@ def _get_oai_dc_metadata(self, record, json):
for key, value in title_data.items():
title.append(self._get_oaic_dc_value(value, key))

creator = []
creator_data = json.get("creator", [])
for value in creator_data:
if "name" in value:
if isinstance(value["name"], dict):
for key, val in value["name"].items():
creator.append(self._get_oaic_dc_value(val, key))
else:
creator.append(self._get_oaic_dc_value(value.get("name")))
creator = self._get_actors_oaic_dc_value(json.get("creator", []))

subject = []
subject_data = json.get("keyword", [])
Expand All @@ -279,20 +272,9 @@ def _get_oai_dc_metadata(self, record, json):
else:
desc.append(desc_data)

publisher = []
publisher_data = json.get("publisher", {})
for key, value in publisher_data.get("name", {}).items():
publisher.append(self._get_oaic_dc_value(value, key))

contributor = []
contributor_data = json.get("contributor", [])
for value in contributor_data:
if "name" in value:
if isinstance(value["name"], dict):
for key, val in value["name"].items():
contributor.append(self._get_oaic_dc_value(val, key))
else:
contributor.append(self._get_oaic_dc_value(value.get("name")))
publisher = self._get_actors_oaic_dc_value([json.get("publisher", {})])

contributor = self._get_actors_oaic_dc_value([json.get("contributor", [])])

date = self._get_oaic_dc_value(str(record.date_created))

Expand Down Expand Up @@ -429,7 +411,9 @@ def identify(self):
"""Implement OAI-PMH verb Identify ."""
first = (
CatalogRecord.objects.filter(
data_catalog__catalog_json__identifier__in=self._get_default_set_filter()
active=True,
state="published",
data_catalog__catalog_json__identifier__in=self._get_default_set_filter(),
)
.order_by("date_created")
.values_list("date_created", flat=True)
Expand Down
50 changes: 50 additions & 0 deletions src/metax_api/management/commands/create_missing_rems_items.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import logging

from django.core.management.base import BaseCommand

from metax_api.services.redis_cache_service import RedisClient
from metax_api.utils import ReferenceDataLoader
from metax_api.models import CatalogRecordV2
from metax_api.services.rems_service import REMSCatalogItemNotFoundException, REMSService

_logger = logging.getLogger(__name__)

class Command(BaseCommand):
def handle(self, *args, **options):
rems_service = REMSService()
found_entity_count = 0
created_entity_count = 0
missing_entity_count = 0
try:
rems_crs = CatalogRecordV2.objects.filter(rems_identifier__isnull=False)
_logger.info(f"Found {len(rems_crs)} CatalogRecords with rems_identifiers")
for cr in rems_crs:
try:
rems_service.get_rems_entity(cr)
found_entity_count += 1
except REMSCatalogItemNotFoundException as e:
missing_entity_count += 1
if not cr.access_granter:
_logger.info(
f"Missing access_granter for {cr.identifier}, not creating REMS entity"
)
continue
if len(cr.research_dataset.get("access_rights", {}).get("license") or []) == 0:
_logger.info(
f"Missing license for {cr.identifier}, not creating REMS entity"
)
continue

_logger.info(
f"REMS entity {cr.rems_identifier} for dataset {cr.identifier} not found, creating"
)
rems_service.create_rems_entity(cr, cr.access_granter)
created_entity_count += 1

except Exception as e:
_logger.error(e)
raise e

_logger.info(f"CatalogRecords with existing REMS entities: {found_entity_count}")
_logger.info(f"Missing REMS entities: {missing_entity_count}")
_logger.info(f"Created REMS entities: {created_entity_count}")
58 changes: 58 additions & 0 deletions src/metax_api/migrations/0046_replace_dataset_owner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from django.db import migrations

import logging

logger = logging.getLogger(__name__)

def replace_metadata_provider_user(cr, old_user, new_user):
logger.info(f"replacing metadata_provider_user: {old_user} with new_user: {new_user}")
if cr.metadata_provider_user:
if cr.metadata_provider_user == old_user:
cr.metadata_provider_user = new_user
logger.info("metadata_provider_user changed")

def change_metadata_provider_user(apps, schema_editor):
new_user = "frickmar"
old_user = "[email protected]"
CatalogRecord = apps.get_model('metax_api', 'CatalogRecord')
crs = CatalogRecord.objects.filter(metadata_provider_user=old_user)
logger.info(f"Found {len(crs)} catalog records to update")
for cr in crs:
try:
logger.info(f"changing metadata_provider_user for cr {cr.identifier}")
replace_metadata_provider_user(cr, old_user, new_user)
# cr.editor_permissions.user_id = "asdf"
cr.save()
cr.editor_permissions.users.update(user_id=new_user)
logger.info("cr save successful")
except Exception as e:
logger.error(e)


def revert(apps, schema_editor):
new_user = "frickmar"
old_user = "[email protected]"
CatalogRecord = apps.get_model('metax_api', 'CatalogRecord')
crs = CatalogRecord.objects.filter(metadata_provider_user=new_user)
logger.info(f"Found {len(crs)} catalog records to update")
for cr in crs:
try:
logger.info(f"changing metadata_provider_user for cr {cr.identifier}")
replace_metadata_provider_user(cr, new_user, old_user)
# cr.editor_permissions.user_id = old_user
cr.save()
cr.editor_permissions.users.update(user_id=old_user)
logger.info("cr save successful")
except Exception as e:
logger.error(e)
logger.error(e)

class Migration(migrations.Migration):

dependencies = [
('metax_api', '0045_add_publish_fields_to_catalogs'),
]

operations = [
migrations.RunPython(change_metadata_provider_user, revert),
]
40 changes: 27 additions & 13 deletions src/metax_api/services/datacite_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,16 +447,27 @@ def _subjects(concept):
def _licenses(access_rights):
licenses = []
for license in access_rights["license"]:
for lang in license["title"].keys():
# If only URL of the license is submitted. Use "en" as language
# and set rights and rightsURI to the URL
if len(license) == 1:
licenses.append(
{
"lang": lang,
"rightsURI": license["license"]
if "license" in license
else license["identifier"],
"rights": license["title"][lang],
"lang": "en",
"rightsURI": license["license"],
"rights": license["license"]
}
)
else:
for lang in license["title"].keys():
licenses.append(
{
"lang": lang,
"rightsURI": license["license"]
if "license" in license
else license["identifier"],
"rights": license["title"][lang],
}
)
return licenses

@staticmethod
Expand All @@ -470,12 +481,15 @@ def _spatials(spatials):

for wkt in spatial.get("as_wkt", []):
if wkt.startswith("POINT"):
geo_location["geoLocationPoint"] = {
"pointLongitude": float(
re.search(r"POINT\((.*) ", wkt, re.IGNORECASE).group(1)
),
"pointLatitude": float(re.search(r" (.*)\)", wkt, re.IGNORECASE).group(1)),
point = wkt.strip("POINT()")
point_list = point.strip().split(" ")
longitude = point_list[0]
latitude = point_list[1]
polygon_point = {
"pointLongitude": float(longitude.strip("()")),
"pointLatitude": float(latitude.strip("()")),
}
geo_location["geoLocationPoint"] = polygon_point
# only one point can be placed
break

Expand All @@ -486,8 +500,8 @@ def _spatials(spatials):
for point in polygon.split(","):
longitude, latitude = point.strip().split(" ")
polygon_point = {
"pointLongitude": float(longitude),
"pointLatitude": float(latitude),
"pointLongitude": float(longitude.strip("()")),
"pointLatitude": float(latitude.strip("()")),
}
geo_location["geoLocationPolygon"]["polygonPoints"].append(
polygon_point
Expand Down
Loading

0 comments on commit 139cce7

Please sign in to comment.