This repository has been archived by the owner on Sep 16, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
tonurmi
committed
Jun 6, 2022
1 parent
0044867
commit 21c2cf9
Showing
16 changed files
with
836 additions
and
300 deletions.
There are no files selected for viewing
31 changes: 31 additions & 0 deletions
31
src/metax_api/management/commands/delete_removed_datasets.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import logging | ||
|
||
from django.core.management.base import BaseCommand | ||
|
||
from metax_api.models import DataCatalog, CatalogRecord | ||
from django.forms.models import model_to_dict | ||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Command(BaseCommand): | ||
def handle(self, *args, **options): | ||
logger.info(f"{options=}") | ||
data_catalog = DataCatalog.objects.get(catalog_json__identifier=options["data_catalog_identifier"]) | ||
del_limit = options["del_limit"] | ||
crs = CatalogRecord.objects_unfiltered.filter(data_catalog=data_catalog, removed=True) | ||
logger.info(f"found {crs.count()} removed datasets") | ||
logger.info(f"Will delete {del_limit} datasets at most") | ||
deleted = 0 | ||
for cr in crs: | ||
logger.info(f"deleting CatalogRecord: {model_to_dict(cr)}") | ||
cr.delete(hard=True) | ||
deleted += 1 | ||
if deleted >= del_limit: | ||
break | ||
|
||
logger.info(f"hard deleted {deleted} datasets") | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument("data_catalog_identifier", type=str, | ||
help="Identifier of the data catalog where the datasets are deleted") | ||
parser.add_argument("--del-limit", type=int, help="Max number of datasets to delete") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
33 changes: 33 additions & 0 deletions
33
src/metax_api/migrations/0050_remove_previous_dataset_version_from_deleted_datasets.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from django.db import migrations | ||
from metax_api.models import CatalogRecord as CRM | ||
|
||
import logging | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def remove_previous_dataset_version_from_deleted_datasets(apps, schema_editor): | ||
logger.info("") | ||
logger.info("Removing previous dataset versions information from deleted datasets") | ||
CatalogRecord = apps.get_model('metax_api', 'CatalogRecord') | ||
crs = CatalogRecord.objects.filter(state = CRM.STATE_PUBLISHED, previous_dataset_version__isnull=False, removed = True) | ||
for cr in crs: | ||
logger.info(f"Applying migration to catalog record: {cr}") | ||
cr.previous_dataset_version = None | ||
cr.save() | ||
logger.info(f"Applied migration to {len(crs)} catalog record(s)") | ||
|
||
def revert(apps, schema_editor): | ||
pass | ||
|
||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('metax_api', '0049_auto_20220413_0952'), | ||
] | ||
|
||
operations = [ | ||
migrations.RunPython(remove_previous_dataset_version_from_deleted_datasets, revert), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from django.db import migrations | ||
from metax_api.models import CatalogRecord as CRM | ||
|
||
import json | ||
|
||
import logging | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def replace_org(obj, old_org_id, new_org_id): | ||
|
||
def decode_dict(a_dict): | ||
old_org_value = f"http://uri.suomi.fi/codelist/fairdata/organization/code/{old_org_id}" | ||
new_org_value = f"http://uri.suomi.fi/codelist/fairdata/organization/code/{new_org_id}" | ||
for key, value in a_dict.items(): | ||
try: | ||
a_dict[key] = value.replace(old_org_value, new_org_value) | ||
except AttributeError: | ||
pass | ||
return a_dict | ||
|
||
return json.loads(json.dumps(obj), object_hook=decode_dict) | ||
|
||
|
||
def update_luke_datasets(apps, schema_editor): | ||
logger.info("") | ||
|
||
|
||
new_org_id = "4100010" | ||
old_org_id = "02446292" | ||
old_org_obj = f"\"identifier\": \"http://uri.suomi.fi/codelist/fairdata/organization/code/{old_org_id}\"" | ||
|
||
CatalogRecord = apps.get_model('metax_api', 'CatalogRecord') | ||
crs = CatalogRecord.objects.filter(research_dataset__icontains=old_org_obj) | ||
|
||
logger.info(f"Changing organization id from: {old_org_id} to: {new_org_id} on {len(crs)} dataset(s)") | ||
for cr in crs: | ||
logger.info(f"Applying migration to catalog record: {cr}") | ||
new_rd = replace_org(cr.research_dataset, old_org_id, new_org_id) | ||
cr.research_dataset = new_rd | ||
cr.save() | ||
logger.info(f"Applied migration to {len(crs)} catalog record(s)") | ||
|
||
def revert(apps, schema_editor): | ||
pass | ||
|
||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('metax_api', '0050_remove_previous_dataset_version_from_deleted_datasets'), | ||
] | ||
|
||
operations = [ | ||
migrations.RunPython(update_luke_datasets, revert), | ||
] |
128 changes: 128 additions & 0 deletions
128
src/metax_api/migrations/0052_organization_update_tuni.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
from django.db import migrations | ||
from metax_api.models import CatalogRecord as CRM | ||
|
||
import json | ||
|
||
import logging | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
def is_obj_sub_org(obj, old_sub_org_value): | ||
if isinstance(obj, list): | ||
return False | ||
if obj.get("@type", None) != "Organization": | ||
return False | ||
return old_sub_org_value in str(obj) | ||
|
||
|
||
def replace_sub_org_objs(obj, sub_org_pattern, replacements): | ||
def decode_dict(a_dict): | ||
for key, value in a_dict.items(): | ||
try: | ||
if is_obj_sub_org(value, sub_org_pattern): | ||
value_as_str = json.dumps(value) | ||
new_value_as_str = value_as_str | ||
for replacement in replacements: | ||
new_value_as_str = new_value_as_str.replace(replacement[0]+ '"', replacement[1] + '"') | ||
new_value = json.loads(new_value_as_str) | ||
a_dict[key] = new_value | ||
|
||
except AttributeError: | ||
pass | ||
return a_dict | ||
|
||
return json.loads(json.dumps(obj), object_hook=decode_dict) | ||
|
||
|
||
|
||
def replace_org(obj, old_org_value, new_org_value, old_en_name, new_en_name): | ||
|
||
def decode_dict(a_dict): | ||
for key, value in a_dict.items(): | ||
try: | ||
new_value = value | ||
# To prevent updating any fields accidentally, change the value | ||
# only if the old value matches the old name or the old org value | ||
if value == old_en_name: | ||
new_value = new_value.replace(old_en_name, new_en_name) | ||
elif value == old_org_value: | ||
new_value = new_value.replace(old_org_value, new_org_value) | ||
|
||
a_dict[key] = new_value | ||
except AttributeError: | ||
pass | ||
return a_dict | ||
|
||
return json.loads(json.dumps(obj), object_hook=decode_dict) | ||
|
||
|
||
def update_tuni_datasets(apps, schema_editor): | ||
logger.info("") | ||
|
||
old_en_name = "University of Tampere" | ||
new_en_name = "Tampere University" | ||
new_old_en_name = "University of Tampere (-2018)" | ||
tmp_en_name = "Temporary organization name 123-cba-en" | ||
|
||
old_fi_name = "Tampereen yliopisto" | ||
new_fi_name = "Tampereen yliopisto" | ||
new_old_fi_name = "Tampereen yliopisto (-2018)" | ||
tmp_fi_name = "Temporary organization name 123-cba-fi" | ||
|
||
old_sv_name = "Tammerfors universitet" | ||
new_sv_name = "Tammerfors universitet" | ||
new_old_sv_name = "Tammerfors universitet (-2018)" | ||
tmp_sv_name = "Temporary organization name 123-cba-sv" | ||
|
||
old_org_id = "01905" | ||
new_org_id = "10122" | ||
tmp_org_prefix = "tmp-org-123-cba-" | ||
url_prefix = "http://uri.suomi.fi/codelist/fairdata/organization/code/" | ||
|
||
old_org_sql = f"\"identifier\": \"{url_prefix}{old_org_id}\"" | ||
old_org_value = f"{url_prefix}{old_org_id}" | ||
sub_org_pattern = f"{url_prefix}{old_org_id}-" | ||
tmp_org_value = f"{url_prefix}{tmp_org_prefix}{old_org_id}" | ||
new_org_value = f"{url_prefix}{new_org_id}" | ||
|
||
tmp_replacements = [[old_org_value, tmp_org_value], [old_en_name, tmp_en_name], [old_fi_name, tmp_fi_name], [old_sv_name, tmp_sv_name]] | ||
new_replacements = [[tmp_org_value, old_org_value], [tmp_en_name, new_old_en_name], [tmp_fi_name, new_old_fi_name], [tmp_sv_name, new_old_sv_name]] | ||
|
||
CatalogRecord = apps.get_model('metax_api', 'CatalogRecord') | ||
crs = CatalogRecord.objects.filter(research_dataset__icontains=old_org_sql) | ||
|
||
logger.info(f"Changing organization id from: {old_org_id} to: {new_org_id} on {len(crs)} dataset(s)") | ||
for cr in crs: | ||
logger.info(f"Applying migration to catalog record: {cr}") | ||
|
||
# Replace parent organization id and names in sub organizations with temporary values | ||
tmp_rd = replace_sub_org_objs(cr.research_dataset, sub_org_pattern, tmp_replacements) | ||
|
||
# Replace the old organization id and names with the new values | ||
new_rd = replace_org(tmp_rd, old_org_value, new_org_value, old_en_name, new_en_name) | ||
|
||
# Replace the temporary parent organization id and names in sub orgs with the original id and new "old" name | ||
final_rd = replace_sub_org_objs(new_rd, sub_org_pattern, new_replacements) | ||
|
||
cr.research_dataset = final_rd | ||
cr.save() | ||
logger.info(f"Applied migration to {len(crs)} catalog record(s)") | ||
|
||
|
||
def revert(apps, schema_editor): | ||
""" | ||
Revert does not anything | ||
""" | ||
pass | ||
|
||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('metax_api', '0051_organization_update_luke'), | ||
] | ||
|
||
operations = [ | ||
migrations.RunPython(update_tuni_datasets, revert), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.