Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Commit

Permalink
Merge branch 'test' into 'stable'
Browse files Browse the repository at this point in the history
22/03/Hotfix/2 Production Update

See merge request fairdata/fairdata-metax!164
  • Loading branch information
Toni Nurmi committed Mar 11, 2022
2 parents 5da1e54 + da70dc5 commit 14e514d
Show file tree
Hide file tree
Showing 19 changed files with 371 additions and 25 deletions.
4 changes: 2 additions & 2 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ clean_gitlab_env:
- echo "Cleaning deleted branches from environments"
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
when: manual
- when: never
when: never
- when: manual

update_metax:
stage: update
Expand Down
28 changes: 28 additions & 0 deletions src/metax_api/api/rpc/base/views/statistic_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,31 @@ def count_files(self, request):
}

return Response(StatisticService.count_files(**params))

@action(detail=False, methods=["get"], url_path="projects_summary")
def projects_summary(self, request):
if not request.query_params.get("projects"):
params = {
"projects": None,
}

else:
params = {
"projects": list(CS.get_list_query_param(request, "projects")),
}

return Response(StatisticService.projects_summary(**params))

@action(detail=False, methods=["get"], url_path="organizations_summary")
def organizations_summary(self, request):
if not request.query_params.get("organizations"):
params = {
"organizations": None,
}

else:
params = {
"organizations": list(CS.get_list_query_param(request, "organizations")),
}

return Response(StatisticService.organizations_summary(**params))
47 changes: 47 additions & 0 deletions src/metax_api/management/commands/create_statistic_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import logging

from django.core.management.base import BaseCommand
from django.db.models import Sum
from django.db.models.expressions import RawSQL

from metax_api.models import File, CatalogRecordV2, OrganizationStatistics, ProjectStatistics
from metax_api.api.rest.base.views import FileViewSet
from metax_api.services import FileService, StatisticService

logger = logging.getLogger(__name__)

class Command(BaseCommand):
def handle(self, *args, **options):

logger.info("Creating statistic summary")

OrganizationStatistics.objects.all().delete()
ProjectStatistics.objects.all().delete()


ida_projects = File.objects.all().values("project_identifier").distinct()
for project in ida_projects:
project_id = project["project_identifier"]
ret = StatisticService.count_files([project_id], include_pids=True)
count = ret[0]["count"]
size = ret[0]["byte_size"]
file_pids = ret[1]

if len(file_pids) == 0:
catalog_records = ""
else:
catalog_records = FileService.get_identifiers(file_pids, "noparams", True, get_pids=True).data

stat = ProjectStatistics(project_id, count, size, catalog_records)
stat.save()


organizations = CatalogRecordV2.objects.all().order_by().values("metadata_provider_org").distinct()

for org in organizations:
org_id = org["metadata_provider_org"]
ret = StatisticService.count_datasets(metadata_provider_org=org_id)
stat = OrganizationStatistics(org_id, ret["count"], ret["ida_byte_size"])
stat.save()

logger.info("Statistic summary created")
19 changes: 14 additions & 5 deletions src/metax_api/management/commands/update_orgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ def get_orgs_from_api() -> List[Organization]:
orgs.append(o)

o = Organization(name_fi, name_en, org_code)
if name_sv:
o.org_name_sv = name_sv
orgs.append(o)

logger.info(f"retrieved {len(orgs)} organizations from research.fi")
return orgs

Expand Down Expand Up @@ -153,9 +156,12 @@ def handle(self, *args, **options):
added += 1
logger.info(f"Added {added} organisations from research.fi to local org list")

# sort orgs alphabetically
s = sorted(union, key=lambda i: (i.org_name_fi, i.unit_name))
with open(settings.ORG_FILE_PATH, "w") as f:
# write new orgs to local csv
with open(settings.ORG_FILE_PATH, "w", newline='') as f:
logger.info("writing updated csv")
# remove duplicates
no_duplicates = []
for c in s:
if c not in no_duplicates:
Expand All @@ -165,6 +171,7 @@ def handle(self, *args, **options):
writer = csv.DictWriter(
f,
fieldnames=CSV_HEADERS,
lineterminator='\n',
)
writer.writeheader()
for i in csv_serialized:
Expand All @@ -174,9 +181,11 @@ def handle(self, *args, **options):
# Malformed values from TTV api
if i["unit_name"] == "LÄÄKETIETEELLINEN TIEDEKUNTA":
continue
if "Ã…bo" in i["unit_name"]:
i["unit_name"] = str(i["unit_name"]).replace("Ã…bo", "Åbo")
if "ÖS" in i["unit_name"]:
i["unit_name"] = str(i["unit_name"]).replace("ÖS", "Ö")
if "Ã…" in i["unit_name"]:
i["unit_name"] = str(i["unit_name"]).replace("Ã…", "Å")
if "Ö" in i["unit_name"]:
i["unit_name"] = str(i["unit_name"]).replace("Ö", "Ö")
if "Ä" in i["unit_name"]:
i["unit_name"] = str(i["unit_name"]).replace("Ä", "Ä")
writer.writerow(i)
logger.info("successfully updated organization csv")
55 changes: 55 additions & 0 deletions src/metax_api/migrations/0047_update_vrk_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from django.db import migrations

import logging

logger = logging.getLogger(__name__)

def update_vrk_datasets(apps, schema_editor):

logger.info("Updating organization info of catalog records by Väestörekisterikeskus")

CatalogRecord = apps.get_model('metax_api', 'CatalogRecord')
new_name_fi = "Digi- ja väestötietovirasto"
new_name_en = "Digital and Population Data Services Agency"
new_name_sv = "Myndigheten för digitalisering och befolkningsdata"
description_suffix_fi = "\n\nAineiston luojaorganisaation aikaisempi nimi: Väestörekisterikeskus."
description_suffix_en = "\n\nPrevious name of dataset creator organization: Population Register Center."

# Catalog Records by Väestörekisterikeskus
# Getting these from the database using Django filters would have
# been too complicated, so instead they are hardcoded
cr_ids = [
"a3610de8-73fa-4e25-a89b-320549c71f0a",
"b77c91cf-a437-4d01-b2ec-efb08605d559",
"7787c312-3973-4e16-a032-7b89a0257739"
]

crs = CatalogRecord.objects.filter(identifier__in = cr_ids)
logger.info(f"Found {len(crs)} catalog records to update")
for cr in crs:
cr_json = cr.research_dataset
logger.info(f"Updating catalog record: {cr}")
cr_json["creator"][0]["name"]["en"] = new_name_en
cr_json["creator"][0]["name"]["fi"] = new_name_fi
cr_json["creator"][0]["name"]["sv"] = new_name_sv
if description_suffix_en not in cr_json["description"]["en"]:
cr_json["description"]["en"] += description_suffix_en
if description_suffix_fi not in cr_json["description"]["fi"]:
cr_json["description"]["fi"] += description_suffix_fi
cr.save()


def revert(apps, schema_editor):
pass



class Migration(migrations.Migration):

dependencies = [
('metax_api', '0046_replace_dataset_owner'),
]

operations = [
migrations.RunPython(update_vrk_datasets, revert),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Generated by Django 3.2.10 on 2021-12-31 07:43

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('metax_api', '0047_update_vrk_datasets'),
]

operations = [
migrations.CreateModel(
name='OrganizationStatistics',
fields=[
('organization', models.CharField(max_length=200, primary_key=True, serialize=False)),
('count', models.IntegerField()),
('byte_size', models.IntegerField()),
],
),
migrations.CreateModel(
name='ProjectStatistics',
fields=[
('project_identifier', models.CharField(max_length=200, primary_key=True, serialize=False)),
('count', models.IntegerField()),
('byte_size', models.IntegerField()),
('published_datasets', models.TextField()),
],
),
]
2 changes: 2 additions & 0 deletions src/metax_api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@
from .file import File
from .file_storage import FileStorage
from .metax_user import MetaxUser
from .organization_statistics import OrganizationStatistics
from .project_statistics import ProjectStatistics
from .xml_metadata import XmlMetadata
9 changes: 6 additions & 3 deletions src/metax_api/models/catalog_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ def user_has_access(self, request):
raise Http404

# write operation
return self.user_is_owner(request)
return self.user_is_privileged(request)

def user_is_owner(self, request):
if self.state == self.STATE_DRAFT and self.metadata_provider_user != request.user.username:
Expand Down Expand Up @@ -648,11 +648,14 @@ def user_is_privileged(self, request):
return True
else:
return True

users = self.editor_permissions.users
ids = users.all().values_list('user_id', flat=True)
if request.user.username in ids:
return True
elif self.user_is_owner(request):
# can see sensitive fields
return True
else:
# unknown user
return False

def _check_catalog_permissions(self, catalog_groups, catalog_services, request=None):
Expand Down
18 changes: 18 additions & 0 deletions src/metax_api/models/organization_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# This file is part of the Metax API service
#
# Copyright 2017-2018 Ministry of Education and Culture, Finland
#
# :author: CSC - IT Center for Science Ltd., Espoo Finland <[email protected]>
# :license: MIT

import logging

from django.db import models

_logger = logging.getLogger(__name__)

class OrganizationStatistics(models.Model):
organization = models.CharField(primary_key=True, max_length=200)
count = models.IntegerField()
byte_size = models.IntegerField()

18 changes: 18 additions & 0 deletions src/metax_api/models/project_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# This file is part of the Metax API service
#
# Copyright 2017-2018 Ministry of Education and Culture, Finland
#
# :author: CSC - IT Center for Science Ltd., Espoo Finland <[email protected]>
# :license: MIT

import logging

from django.db import models

_logger = logging.getLogger(__name__)

class ProjectStatistics(models.Model):
project_identifier = models.CharField(primary_key=True, max_length=200)
count = models.IntegerField()
byte_size = models.IntegerField()
published_datasets = models.TextField()
1 change: 1 addition & 0 deletions src/metax_api/onappstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from metax_api.utils import ReferenceDataLoader, executing_test_case, convert_yaml_to_html


_logger = logging.getLogger(__name__)


Expand Down
9 changes: 7 additions & 2 deletions src/metax_api/services/file_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def restore_files(cls, request, file_identifier_list):
return Response({"restored_files_count": affected_rows}, status=status.HTTP_200_OK)

@classmethod
def get_identifiers(cls, identifiers, params, keysonly):
def get_identifiers(cls, identifiers, params, keysonly, get_pids = False):
"""
keys='files': Find out which (non-deprecated) datasets a list of files belongs to, and return
their preferred_identifiers per file as a list in json format.
Expand All @@ -243,6 +243,8 @@ def get_identifiers(cls, identifiers, params, keysonly):
keysonly= for dataset return dataset ids that have files, for files return file ids that belong
to some dataset
get_pids: get preferred identifiers instead of identifiers. Only applicable, if params = "noparams"
Parameter identifiers can be a list of pk's (integers), or file/dataset identifiers (strings).
"""
_logger.info("Retrieving detailed list of %s" % params)
Expand All @@ -253,7 +255,7 @@ def get_identifiers(cls, identifiers, params, keysonly):

_logger.info(
"Searching return for the following %s (printing first 10):\n%s"
% (params, "\n".join(str(id) for id in ids[:10]))
% (params, ", ".join(str(id) for id in ids[:10]))
)

noparams = """
Expand All @@ -266,6 +268,9 @@ def get_identifiers(cls, identifiers, params, keysonly):
GROUP BY cr.identifier
"""

if get_pids:
noparams = noparams.replace("cr.identifier", "research_dataset->>'preferred_identifier'")

files = """
SELECT f.identifier, json_agg(cr.identifier)
FROM metax_api_file f
Expand Down
27 changes: 25 additions & 2 deletions src/metax_api/services/statistic_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from django.db.models.functions import Coalesce

from metax_api.exceptions import Http400
from metax_api.models import CatalogRecord, DataCatalog, File
from metax_api.models import CatalogRecord, DataCatalog, File, ProjectStatistics, OrganizationStatistics

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -616,7 +616,7 @@ def unused_files(cls):
return file_stats

@classmethod
def count_files(cls, projects, removed=None):
def count_files(cls, projects, removed=None, include_pids=False):
kwargs = OrderedDict()
file_query = File.objects_unfiltered.all()

Expand All @@ -630,4 +630,27 @@ def count_files(cls, projects, removed=None):
.distinct()

# Coalesce is required to provides default value
if include_pids:
return file_query.aggregate(count=Count("id"), byte_size=Coalesce(Sum("byte_size"), 0)), list(file_query.values_list('identifier', flat=True))
return file_query.aggregate(count=Count("id"), byte_size=Coalesce(Sum("byte_size"), 0))

@classmethod
def projects_summary(cls, projects):
stats_query = ProjectStatistics.objects.all()
if not projects is None:
stats_query = stats_query.filter(project_identifier__in=projects)
summary = stats_query.values()
if len(summary) == 0:
summary = f"No projects found with project_identifier: {projects}"
return summary

@classmethod
def organizations_summary(cls, organizations):
stats_query = OrganizationStatistics.objects.all()
if not organizations is None:
stats_query = stats_query.filter(organization__in=organizations)
summary = stats_query.values()
if len(summary) == 0:
summary = f"No organizations found with organization_identifier: {organizations}"
return summary

2 changes: 2 additions & 0 deletions src/metax_api/settings/components/access_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ def __lt__(self, other):
api_permissions.rpc.statistics.harvested_datasets_cumulative.use = [Role.ALL]
api_permissions.rpc.statistics.organization_datasets_cumulative.use = [Role.ALL]
api_permissions.rpc.statistics.unused_files.use = [Role.ALL]
api_permissions.rpc.statistics.projects_summary.use = [Role.ALL]
api_permissions.rpc.statistics.organizations_summary.use = [Role.ALL]


def prepare_perm_values(d):
Expand Down
Loading

0 comments on commit 14e514d

Please sign in to comment.