Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Commit

Permalink
Merge branch 'stable' into 'master'
Browse files Browse the repository at this point in the history
Production Update 22/04/01

See merge request fairdata/fairdata-metax!179
  • Loading branch information
Toni Nurmi committed Apr 4, 2022
2 parents 2a398b4 + 79c5b8b commit d719ec4
Show file tree
Hide file tree
Showing 20 changed files with 383 additions and 113 deletions.
108 changes: 80 additions & 28 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,55 @@ workflow:
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME !~ /^(demo|master|stable|staging|test)$/

stages:
- clean_build
- deploy
- test
- clean_test
- clean_env
- update
- clean_build

deploy:
stage: deploy
environment: $CI_COMMIT_REF_NAME
script:
- ansible-playbook -i $ANSIBLE_INVENTORY $DEPLOY_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"
- clean_env

integration_test:
stage: test
tags:
- docker
image:
name: python:3.8
services:
- postgres:9.6
- rabbitmq:3
- redis:latest
- name: elasticsearch:7.9.2
alias: elasticsearch
command: [ "bin/elasticsearch", "-Expack.security.enabled=false", "-Ediscovery.type=single-node" ]
variables:
POSTGRES_USER: metax_user
POSTGRES_PASSWORD: password
POSTGRES_DB: metax_db
METAX_DATABASE: metax_db
METAX_DATABASE_PASSWORD: password
METAX_DATABASE_USER: metax_user
METAX_DATABASE_HOST: postgres
ELASTIC_SEARCH_HOSTS: elasticsearch
RABBIT_MQ_HOSTS: rabbitmq
REDIS_HOST: redis
REDIS_PORT: 6379
DJANGO_SECRET_KEY: django-insecure-22388&4#2_#u0e%$%!)5vo3mpys)#f7@vsk9az354!jgze--&e
DJANGO_ENV: unittests
environment: $CI_COMMIT_REF_NAME
script:
- ansible-playbook -i $ANSIBLE_INVENTORY $TEST_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"

clean_test:
stage: clean_test
environment:
name: $CI_COMMIT_REF_NAME
on_stop: clean_gitlab_env
script:
- ansible-playbook -i $ANSIBLE_INVENTORY $DELETE_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
when: always
- when: never
- apt-get update && apt-get install -y xqilla libxerces-c-dev build-essential libssl-dev libffi-dev python-dev libxqilla-dev
- mkdir -p /var/log/metax-api/errors
- pip install -r requirements.txt
- cd src
- python manage.py first_time_setup
- coverage run manage.py test --parallel
- coverage combine
- coverage report -m
- coverage xml
artifacts:
reports:
cobertura: src/coverage.xml
paths:
- src/coverage.xml

clean_gitlab_env:
stage: clean_env
Expand All @@ -45,16 +64,25 @@ clean_gitlab_env:
script:
- echo "Cleaning deleted branches from environments"
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME !~ /^(demo|master|stable|staging|test)$/
when: never
- when: manual

deploy:
stage: deploy
environment: $CI_COMMIT_REF_NAME
script:
- ansible-playbook -i $ANSIBLE_INVENTORY $DEPLOY_PLAYBOOK --vault-id $ANSIBLE_VAULT_FILE -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"
rules:
- if: $CI_COMMIT_BRANCH =~ /^(demo|master|stable|staging|test)$/
when: always

update_metax:
stage: update
environment: $CI_COMMIT_REF_NAME
script:
- ansible-playbook -i $ANSIBLE_INVENTORY $UPDATE_PROXY_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"
- ansible-playbook -i $ANSIBLE_INVENTORY $MANAGE_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"
- ansible-playbook -i $ANSIBLE_INVENTORY $UPDATE_PROXY_PLAYBOOK --vault-id $ANSIBLE_VAULT_FILE -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"
- ansible-playbook -i $ANSIBLE_INVENTORY $MANAGE_PLAYBOOK --vault-id $ANSIBLE_VAULT_FILE -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"
rules:
- if: $CI_COMMIT_BRANCH =~ /^(demo|stable|staging|test)$/
when: always
Expand All @@ -64,9 +92,33 @@ clean_previous_build:
stage: clean_build
environment: $CI_COMMIT_REF_NAME
script:
- ansible-playbook -i $ANSIBLE_INVENTORY $DELETE_PLAYBOOK -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"
- ansible-playbook -i $ANSIBLE_INVENTORY $DELETE_PLAYBOOK --vault-id $ANSIBLE_VAULT_FILE -e "build_id=$CI_COMMIT_REF_SLUG repo_version=$CI_COMMIT_REF_NAME"
rules:
- if: $CI_COMMIT_BRANCH =~ /^(staging|test|stable)$/
when: never
when: manual
when: always

sonarqube-check:
tags:
- docker
stage: test
image:
name: sonarsource/sonar-scanner-cli:latest
entrypoint: [""]
variables:
SONAR_USER_HOME: "${CI_PROJECT_DIR}/.sonar" # Defines the location of the analysis task cache
GIT_DEPTH: "0" # Tells git to fetch all the branches of the project, required by the analysis task
cache:
key: "${CI_JOB_NAME}"
paths:
- .sonar/cache
script:
- sonar-scanner
allow_failure: true
needs:
- job: integration_test
artifacts: true
only:
- merge_requests
- master
- test
- staging
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Metax API

This repository contains the code for Metax API service.

## License
Expand Down
9 changes: 9 additions & 0 deletions sonar-project.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
sonar.projectKey=fairdata_fairdata-metax_AX-2XrAIExyDtAUSxNVt
sonar.qualitygate.wait=true
# Scan settings.
#sonar.projectBaseDir=.
# Define the directories that should be scanned. Comma separated.
sonar.sources=src/metax_api/
sonar.exclusions=src/metax_api/tests/**/*
sonar.tests=src/metax_api/tests/
sonar.python.coverage.reportPaths=src/coverage*.xml
7 changes: 6 additions & 1 deletion src/.coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ omit =
*/urls.py
*/router.py
metax_api/onappstart.py
*/site-packages/*

branch = True
concurrency = multiprocessing
data_file = coverage_data/coverage.db
Expand All @@ -29,6 +31,8 @@ exclude_lines =
raise NotImplementedError
if 0:
if __name__ == .__main__.:


skip_covered = True
omit =
*/migrations/*
Expand All @@ -40,4 +44,5 @@ omit =
*/manage.py
*/urls.py
*/router.py
metax_api/onappstart.py
metax_api/onappstart.py
*/site-packages/*
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,29 @@ def _check_end_user_allowed_catalogs(self, dc_identifier):
}
)

def _filter_research_dataset_fields(self, res):
"""
If research_dataset_fields query parameter is supplied, return only
requested fields from research_dataset.
"""
if (
"research_dataset" in res
and "view" in self.context
and "research_dataset_fields" in self.context["view"].request.query_params
):
research_dataset_fields = set(
self.context["view"]
.request.query_params.get("research_dataset_fields", "")
.split(",")
)
research_dataset = {
key: value
for (key, value) in res["research_dataset"].items()
if key in research_dataset_fields
}
return {**res, "research_dataset": research_dataset}
return res

def to_representation(self, instance):
res = super(CatalogRecordSerializer, self).to_representation(instance)

Expand Down Expand Up @@ -281,7 +304,14 @@ def to_representation(self, instance):
res["alternate_record_set"] = [ar.identifier for ar in alternate_records]

if "dataset_version_set" in res:
res["dataset_version_set"] = instance.dataset_version_set.get_listing()
# avoid querying records when there are no other datasets in dataset_version_set
if (
hasattr(instance, "dataset_version_set__records__count")
and instance.dataset_version_set__records__count == 1
):
res["dataset_version_set"] = [instance.version_dict]
else:
res["dataset_version_set"] = instance.dataset_version_set.get_listing()

if "next_dataset_version" in res:
if instance.next_dataset_version.state == CatalogRecord.STATE_PUBLISHED:
Expand Down Expand Up @@ -322,6 +352,7 @@ def to_representation(self, instance):
if "request" in self.context and "file_details" in self.context["request"].query_params:
CRS.populate_file_details(res, self.context["request"])

res = self._filter_research_dataset_fields(res)
res = self._check_and_strip_sensitive_fields(instance, res)

return res
Expand Down Expand Up @@ -441,6 +472,7 @@ def validate_research_dataset(self, value):
self._validate_research_dataset_uniqueness(value)

CRS.validate_reference_data(value, cache)
self._validate_org_name_is_set(self.initial_data["research_dataset"])

return value

Expand Down
10 changes: 0 additions & 10 deletions src/metax_api/api/rest/base/views/common_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ def get_queryset(self):
"""
additional_filters = {}
q_filters = []
deduplicated_q_filters = []

if hasattr(self, "queryset_search_params"):
additional_filters.update(**self.queryset_search_params)
Expand All @@ -185,10 +184,6 @@ def get_queryset(self):
# Q-filter objects, which can contain more complex filter options such as OR-clauses
q_filters = additional_filters.pop("q_filters")

if "deduplicated_q_filters" in additional_filters:
# Q-filter objects that may produce duplicate results
deduplicated_q_filters = additional_filters.pop("deduplicated_q_filters")

if CS.get_boolean_query_param(self.request, "removed"):
additional_filters.update({"removed": True})
self.queryset = self.queryset_unfiltered
Expand All @@ -210,11 +205,6 @@ def get_queryset(self):
self.select_related = [rel for rel in self.select_related if rel in self.fields]

queryset = super().get_queryset()
if deduplicated_q_filters:
# run filters that may produce duplicates and deduplicate the results. deduplicating just the ids
# in a subquery is faster than deduplicating the full results when there are a lot of duplicates.
id_query = queryset.filter(*deduplicated_q_filters).values("id").distinct()
queryset = queryset.filter(id__in=id_query)
queryset = queryset.filter(*q_filters, **additional_filters)

if self.request.META["REQUEST_METHOD"] in WRITE_OPERATIONS:
Expand Down
21 changes: 21 additions & 0 deletions src/metax_api/api/rest/base/views/dataset_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from django.conf import settings
from django.http import Http404
from django.db.models import Count

from rest_framework import status
from rest_framework.decorators import action
Expand Down Expand Up @@ -74,6 +75,26 @@ def get_queryset(self):
data_catalog__catalog_json__identifier__in=settings.LEGACY_CATALOGS
)

if self.request.META["REQUEST_METHOD"] == "GET":
# Optimize dataset listing by prefetching related objects.
# Annotate results with number of records in dataset_version_set
# to allow the serializer skip querying other versions when there
# is only one.
return (
super()
.get_queryset()
.prefetch_related(
"data_catalog",
"dataset_version_set",
"preservation_dataset_version",
"preservation_dataset_origin_version",
"next_draft",
"draft_of",
"editor_permissions",
)
.annotate(Count("dataset_version_set__records"))
)

return super().get_queryset()

def retrieve(self, request, *args, **kwargs):
Expand Down
9 changes: 6 additions & 3 deletions src/metax_api/api/rpc/v2/views/dataset_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def create_draft(self, request):
data={"id": cr.next_draft.id, "identifier": cr.next_draft.identifier},
status=status.HTTP_201_CREATED,
)
except DatabaseError:
except DatabaseError as e:
_logger.error(f"DatabaseError: {e}")
return Response({'error': 'Failed to create draft'}, status=status.HTTP_400_BAD_REQUEST)

@action(detail=False, methods=["post"], url_path="create_new_version")
Expand All @@ -100,7 +101,8 @@ def create_new_version(self, request):
},
status=status.HTTP_201_CREATED,
)
except DatabaseError:
except DatabaseError as e:
_logger.error(f"DatabaseError: {e}")
return Response({'error': 'Failed to create a new version'}, status=status.HTTP_400_BAD_REQUEST)

@action(detail=False, methods=["post"], url_path="publish_dataset")
Expand All @@ -115,7 +117,8 @@ def publish_dataset(self, request):
data={"preferred_identifier": cr.preferred_identifier},
status=status.HTTP_200_OK,
)
except DatabaseError:
except DatabaseError as e:
_logger.error(f"DatabaseError: {e}")
return Response({'error': 'Failed to publish dataset'}, status=status.HTTP_400_BAD_REQUEST)

@action(detail=False, methods=["post"], url_path="merge_draft")
Expand Down
10 changes: 6 additions & 4 deletions src/metax_api/management/commands/create_statistic_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ def handle(self, *args, **options):
file_pids = ret[1]

if len(file_pids) == 0:
catalog_records = ""
published_catalog_record_pids = ""
else:
catalog_records = FileService.get_identifiers(file_pids, "noparams", True, get_pids=True).data
all_catalog_records = FileService.get_identifiers(file_pids, "noparams", True).data
published_catalog_records = CatalogRecordV2.objects.filter(identifier__in = all_catalog_records, state = "published")
published_catalog_record_pids = list(published_catalog_records.values_list('research_dataset__preferred_identifier', flat = True).distinct())

stat = ProjectStatistics(project_id, count, size, catalog_records)
stat = ProjectStatistics(project_id, count, size, published_catalog_record_pids)
stat.save()


Expand All @@ -44,4 +46,4 @@ def handle(self, *args, **options):
stat = OrganizationStatistics(org_id, ret["count"], ret["ida_byte_size"])
stat.save()

logger.info("Statistic summary created")
logger.info("Statistic summary created")
Loading

0 comments on commit d719ec4

Please sign in to comment.