Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Commit

Permalink
Version 2.7.5 Update 22/06/01
Browse files Browse the repository at this point in the history
  • Loading branch information
tonurmi committed Jun 6, 2022
1 parent 0044867 commit 21c2cf9
Show file tree
Hide file tree
Showing 16 changed files with 836 additions and 300 deletions.
31 changes: 31 additions & 0 deletions src/metax_api/management/commands/delete_removed_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import logging

from django.core.management.base import BaseCommand

from metax_api.models import DataCatalog, CatalogRecord
from django.forms.models import model_to_dict
logger = logging.getLogger(__name__)


class Command(BaseCommand):
def handle(self, *args, **options):
logger.info(f"{options=}")
data_catalog = DataCatalog.objects.get(catalog_json__identifier=options["data_catalog_identifier"])
del_limit = options["del_limit"]
crs = CatalogRecord.objects_unfiltered.filter(data_catalog=data_catalog, removed=True)
logger.info(f"found {crs.count()} removed datasets")
logger.info(f"Will delete {del_limit} datasets at most")
deleted = 0
for cr in crs:
logger.info(f"deleting CatalogRecord: {model_to_dict(cr)}")
cr.delete(hard=True)
deleted += 1
if deleted >= del_limit:
break

logger.info(f"hard deleted {deleted} datasets")

def add_arguments(self, parser):
parser.add_argument("data_catalog_identifier", type=str,
help="Identifier of the data catalog where the datasets are deleted")
parser.add_argument("--del-limit", type=int, help="Max number of datasets to delete")
24 changes: 19 additions & 5 deletions src/metax_api/management/commands/load_data_to_TTV.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@

import logging

from django.conf import settings as django_settings
from django.core.management.base import BaseCommand

from metax_api.models import CatalogRecord
from metax_api.models import CatalogRecord, DataCatalog
from metax_api.services import RabbitMQService

logger = logging.getLogger(__name__)
Expand All @@ -30,23 +31,36 @@ def __init__(self, user):

class Command(BaseCommand):

help = "Upload all existing and removed catalog records to TTV's RabbitMQ queue"
help = """Upload all existing non-pas and preservation_state = 120 catalog records with routing_key create
and removed catalog records with routing_key delete to TTV's RabbitMQ queue"""

def handle(self, *args, **options):
user = User()
request = Request(user)
context = {"request": request}

aff_rows = 0
catalog_records = CatalogRecord.objects.filter(state="published")
pas_catalog = DataCatalog.objects.get(catalog_json__identifier=django_settings.PAS_DATA_CATALOG_IDENTIFIER)
catalog_records = CatalogRecord.objects.filter(state="published", data_catalog=pas_catalog, preservation_state=CatalogRecord.PRESERVATION_STATE_IN_PAS)
for catalog_record in catalog_records:
serializer = catalog_record.serializer_class
cr_json = serializer(catalog_record, context=context).data
cr_json["data_catalog"] = {"catalog_json": catalog_record.data_catalog.catalog_json}

RabbitMQService.publish(cr_json, routing_key="create", exchange="TTV-datasets")
aff_rows += 1
logger.info(f"Published {aff_rows} records to exchange: TTV-datasets, routing_key: create")
logger.info(f"Published {aff_rows} PAS catalog records to exchange: TTV-datasets, routing_key: create")

aff_rows = 0
catalog_records = CatalogRecord.objects.filter(state="published").exclude(data_catalog=pas_catalog)
for catalog_record in catalog_records:
serializer = catalog_record.serializer_class
cr_json = serializer(catalog_record, context=context).data
cr_json["data_catalog"] = {"catalog_json": catalog_record.data_catalog.catalog_json}

RabbitMQService.publish(cr_json, routing_key="create", exchange="TTV-datasets")
aff_rows += 1
logger.info(f"Published {aff_rows} non-PAS catalog records to exchange: TTV-datasets, routing_key: create")

aff_rows = 0
removed_catalog_records = CatalogRecord.objects_unfiltered.filter(removed=True)
Expand All @@ -57,7 +71,7 @@ def handle(self, *args, **options):

RabbitMQService.publish(cr_json, routing_key="delete", exchange="TTV-datasets")
aff_rows += 1
logger.info(f"Published {aff_rows} records to exchange: TTV-datasets, routing_key: delete")
logger.info(f"Published {aff_rows} removed records to exchange: TTV-datasets, routing_key: delete")



Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from django.db import migrations
from metax_api.models import CatalogRecord as CRM

import logging

logger = logging.getLogger(__name__)


def remove_previous_dataset_version_from_deleted_datasets(apps, schema_editor):
logger.info("")
logger.info("Removing previous dataset versions information from deleted datasets")
CatalogRecord = apps.get_model('metax_api', 'CatalogRecord')
crs = CatalogRecord.objects.filter(state = CRM.STATE_PUBLISHED, previous_dataset_version__isnull=False, removed = True)
for cr in crs:
logger.info(f"Applying migration to catalog record: {cr}")
cr.previous_dataset_version = None
cr.save()
logger.info(f"Applied migration to {len(crs)} catalog record(s)")

def revert(apps, schema_editor):
pass



class Migration(migrations.Migration):

dependencies = [
('metax_api', '0049_auto_20220413_0952'),
]

operations = [
migrations.RunPython(remove_previous_dataset_version_from_deleted_datasets, revert),
]
58 changes: 58 additions & 0 deletions src/metax_api/migrations/0051_organization_update_luke.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from django.db import migrations
from metax_api.models import CatalogRecord as CRM

import json

import logging

logger = logging.getLogger(__name__)


def replace_org(obj, old_org_id, new_org_id):

def decode_dict(a_dict):
old_org_value = f"http://uri.suomi.fi/codelist/fairdata/organization/code/{old_org_id}"
new_org_value = f"http://uri.suomi.fi/codelist/fairdata/organization/code/{new_org_id}"
for key, value in a_dict.items():
try:
a_dict[key] = value.replace(old_org_value, new_org_value)
except AttributeError:
pass
return a_dict

return json.loads(json.dumps(obj), object_hook=decode_dict)


def update_luke_datasets(apps, schema_editor):
logger.info("")


new_org_id = "4100010"
old_org_id = "02446292"
old_org_obj = f"\"identifier\": \"http://uri.suomi.fi/codelist/fairdata/organization/code/{old_org_id}\""

CatalogRecord = apps.get_model('metax_api', 'CatalogRecord')
crs = CatalogRecord.objects.filter(research_dataset__icontains=old_org_obj)

logger.info(f"Changing organization id from: {old_org_id} to: {new_org_id} on {len(crs)} dataset(s)")
for cr in crs:
logger.info(f"Applying migration to catalog record: {cr}")
new_rd = replace_org(cr.research_dataset, old_org_id, new_org_id)
cr.research_dataset = new_rd
cr.save()
logger.info(f"Applied migration to {len(crs)} catalog record(s)")

def revert(apps, schema_editor):
pass



class Migration(migrations.Migration):

dependencies = [
('metax_api', '0050_remove_previous_dataset_version_from_deleted_datasets'),
]

operations = [
migrations.RunPython(update_luke_datasets, revert),
]
128 changes: 128 additions & 0 deletions src/metax_api/migrations/0052_organization_update_tuni.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from django.db import migrations
from metax_api.models import CatalogRecord as CRM

import json

import logging

logger = logging.getLogger(__name__)

def is_obj_sub_org(obj, old_sub_org_value):
if isinstance(obj, list):
return False
if obj.get("@type", None) != "Organization":
return False
return old_sub_org_value in str(obj)


def replace_sub_org_objs(obj, sub_org_pattern, replacements):
def decode_dict(a_dict):
for key, value in a_dict.items():
try:
if is_obj_sub_org(value, sub_org_pattern):
value_as_str = json.dumps(value)
new_value_as_str = value_as_str
for replacement in replacements:
new_value_as_str = new_value_as_str.replace(replacement[0]+ '"', replacement[1] + '"')
new_value = json.loads(new_value_as_str)
a_dict[key] = new_value

except AttributeError:
pass
return a_dict

return json.loads(json.dumps(obj), object_hook=decode_dict)



def replace_org(obj, old_org_value, new_org_value, old_en_name, new_en_name):

def decode_dict(a_dict):
for key, value in a_dict.items():
try:
new_value = value
# To prevent updating any fields accidentally, change the value
# only if the old value matches the old name or the old org value
if value == old_en_name:
new_value = new_value.replace(old_en_name, new_en_name)
elif value == old_org_value:
new_value = new_value.replace(old_org_value, new_org_value)

a_dict[key] = new_value
except AttributeError:
pass
return a_dict

return json.loads(json.dumps(obj), object_hook=decode_dict)


def update_tuni_datasets(apps, schema_editor):
logger.info("")

old_en_name = "University of Tampere"
new_en_name = "Tampere University"
new_old_en_name = "University of Tampere (-2018)"
tmp_en_name = "Temporary organization name 123-cba-en"

old_fi_name = "Tampereen yliopisto"
new_fi_name = "Tampereen yliopisto"
new_old_fi_name = "Tampereen yliopisto (-2018)"
tmp_fi_name = "Temporary organization name 123-cba-fi"

old_sv_name = "Tammerfors universitet"
new_sv_name = "Tammerfors universitet"
new_old_sv_name = "Tammerfors universitet (-2018)"
tmp_sv_name = "Temporary organization name 123-cba-sv"

old_org_id = "01905"
new_org_id = "10122"
tmp_org_prefix = "tmp-org-123-cba-"
url_prefix = "http://uri.suomi.fi/codelist/fairdata/organization/code/"

old_org_sql = f"\"identifier\": \"{url_prefix}{old_org_id}\""
old_org_value = f"{url_prefix}{old_org_id}"
sub_org_pattern = f"{url_prefix}{old_org_id}-"
tmp_org_value = f"{url_prefix}{tmp_org_prefix}{old_org_id}"
new_org_value = f"{url_prefix}{new_org_id}"

tmp_replacements = [[old_org_value, tmp_org_value], [old_en_name, tmp_en_name], [old_fi_name, tmp_fi_name], [old_sv_name, tmp_sv_name]]
new_replacements = [[tmp_org_value, old_org_value], [tmp_en_name, new_old_en_name], [tmp_fi_name, new_old_fi_name], [tmp_sv_name, new_old_sv_name]]

CatalogRecord = apps.get_model('metax_api', 'CatalogRecord')
crs = CatalogRecord.objects.filter(research_dataset__icontains=old_org_sql)

logger.info(f"Changing organization id from: {old_org_id} to: {new_org_id} on {len(crs)} dataset(s)")
for cr in crs:
logger.info(f"Applying migration to catalog record: {cr}")

# Replace parent organization id and names in sub organizations with temporary values
tmp_rd = replace_sub_org_objs(cr.research_dataset, sub_org_pattern, tmp_replacements)

# Replace the old organization id and names with the new values
new_rd = replace_org(tmp_rd, old_org_value, new_org_value, old_en_name, new_en_name)

# Replace the temporary parent organization id and names in sub orgs with the original id and new "old" name
final_rd = replace_sub_org_objs(new_rd, sub_org_pattern, new_replacements)

cr.research_dataset = final_rd
cr.save()
logger.info(f"Applied migration to {len(crs)} catalog record(s)")


def revert(apps, schema_editor):
"""
Revert does not anything
"""
pass



class Migration(migrations.Migration):

dependencies = [
('metax_api', '0051_organization_update_luke'),
]

operations = [
migrations.RunPython(update_tuni_datasets, revert),
]
11 changes: 9 additions & 2 deletions src/metax_api/models/catalog_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,14 @@ class DatasetVersionSet(models.Model):

id = models.BigAutoField(primary_key=True, editable=False)

def get_listing(self):
def get_listing(self, only_published=True):
"""
Return a list of record preferred_identifiers that belong in the same dataset version chain.
Latest first.
If only_published is True, return only versions that are in published state.
"""
records = (
self.records(manager="objects_unfiltered")
.filter(state=CatalogRecord.STATE_PUBLISHED)
.order_by("-date_created")
.only(
"id",
Expand All @@ -168,6 +168,8 @@ def get_listing(self):
"removed",
)
)
if only_published:
records = records.filter(state=CatalogRecord.STATE_PUBLISHED)
return [r.version_dict for r in records]

def print_records(self): # pragma: no cover
Expand Down Expand Up @@ -3194,6 +3196,11 @@ def __call__(self):
cr_json, routing_key=self.routing_key, exchange="datasets"
)
if self.cr.catalog_publishes_to_ttv():
if self.cr.catalog_is_pas() and self.cr.preservation_state != self.cr.PRESERVATION_STATE_IN_PAS:
_logger.info("Not publishing the catalog record to TTV." \
" Catalog Record is in PAS catalog and preservation state is not" \
f" {self.cr.PRESERVATION_STATE_IN_PAS}")
return

_logger.info(
"Publishing CatalogRecord %s to RabbitMQ... exchange: TTV-datasets, routing_key: %s"
Expand Down
19 changes: 18 additions & 1 deletion src/metax_api/models/catalog_record_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ def delete(self, *args, **kwargs):
draft_of.next_draft = None
super(CatalogRecord, draft_of).save(update_fields=["next_draft"])

# To avoid duplicate key errors, 'forget' the information about previous dataset version,
# when a CR is deleted.
if self.state == self.STATE_PUBLISHED and self.previous_dataset_version:
self.previous_dataset_version = None
super().save()

super().delete(*args, **kwargs)

def _pre_create_operations(self):
Expand Down Expand Up @@ -1359,7 +1365,7 @@ def _create_new_dataset_version(self):

old_version = self

if old_version.next_dataset_version_id:
if not self._is_newest_non_removed_version():
raise Http400(
"Dataset already has a next version: %s"
% old_version.next_dataset_version.identifier
Expand Down Expand Up @@ -1446,6 +1452,17 @@ def _create_new_dataset_version(self):

self.add_post_request_callable(DelayedLog(**log_args))

def _is_newest_non_removed_version(self):
"""
Check if the dataset is the newest non-removed version of the dataset.
If all versions are removed, return False
"""
versions = self.dataset_version_set.get_listing(only_published=False)
for version in versions:
if not version["removed"]:
return self.identifier == version["identifier"]
return False

def change_cumulative_state(self, new_state):
"""
Change field cumulative_state to new_state.
Expand Down
Loading

0 comments on commit 21c2cf9

Please sign in to comment.