Skip to content
This repository has been archived by the owner on Sep 16, 2022. It is now read-only.

Merge stable to master #494

Merged
merged 29 commits into from
Sep 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
8359b4c
CSCMETAX-610: [ADD] allowed_projects query param for single files
Jul 3, 2019
0a0fcbd
CSCMETAX-610: [ADD|FIX] Added tests and fixed typos
Jul 4, 2019
6c923aa
CSCMETAX-610: [FIX] More generic description in swagger
Jul 9, 2019
f0f6461
Merge branch 'test' into CSCMETAX-610-new-query-param-for-single-update
tompulli Jul 9, 2019
1177e62
CSCMETAX-620: [ADD|REF] implemented cache and refined error handling
Jul 11, 2019
7b6a49b
CSCMETAX-569: [ADD] path filter to be used with project identifier fi…
Jul 23, 2019
bc1f536
CSCMETAX-610: [FIX|REF] Fixed multiple get_object calls
Jul 30, 2019
74ff413
CSCMETAX-572: [ADD] actor_filter for dataset api
Aug 7, 2019
7ab0d23
Merge branch 'test' into CSCMETAX-620-cache-user_projects
hannu40k Aug 15, 2019
4f8a162
CSCMETAX-572: [FIX] clearer description to swagger
Aug 20, 2019
aabc5e0
Merge branch 'test' into CSCMETAX-572-organization-filters-for-datase…
tompulli Aug 20, 2019
6c432ae
Merge branch 'test' into CSCMETAX-569-file_path-filter-for-file-api
Aug 20, 2019
399ed50
Merge pull request #482 from CSCfi/CSCMETAX-572-organization-filters-…
hannu40k Aug 20, 2019
745acff
CSCMETAX-569: [FIX] Refined error handling and testing
Aug 21, 2019
d12172e
Merge branch 'test' into CSCMETAX-569-file_path-filter-for-file-api
tompulli Aug 21, 2019
18efa77
Merge pull request #480 from CSCfi/CSCMETAX-569-file_path-filter-for-…
hannu40k Aug 21, 2019
b5dc5ff
Merge branch 'test' into CSCMETAX-610-new-query-param-for-single-update
tompulli Aug 21, 2019
f1571e5
CSCMETAX-610: [FIX] more general error messages
Aug 21, 2019
b75dbf9
Merge branch 'test' into CSCMETAX-620-cache-user_projects
Aug 21, 2019
c0297da
CSCMETAX-620: [FIX] refined error handling
Aug 21, 2019
6f510a0
Merge pull request #466 from CSCfi/CSCMETAX-610-new-query-param-for-s…
hannu40k Aug 21, 2019
be89f9a
Merge branch 'test' into CSCMETAX-620-cache-user_projects
hannu40k Aug 21, 2019
733bc14
CSCMETAX-640: [ADD/FIX] documented the statistics api and fixed
Aug 30, 2019
0b4a267
Merge branch 'test' into CSCMETAX-640-statistics-api-fix
tompulli Aug 30, 2019
827b2aa
CSCMETAX-640: [REM] commented out unused apis from swagger
Aug 30, 2019
c6b616a
Merge pull request #491 from CSCfi/CSCMETAX-640-statistics-api-fix
hannu40k Aug 30, 2019
b64b425
Merge branch 'test' into CSCMETAX-620-cache-user_projects
hannu40k Sep 2, 2019
e1ae8f2
Merge pull request #475 from CSCfi/CSCMETAX-620-cache-user_projects
hannu40k Sep 2, 2019
816c3a9
Merge pull request #492 from CSCfi/test
hannu40k Sep 2, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 32 additions & 26 deletions src/metax_api/api/rest/base/views/file_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from rest_framework.renderers import JSONRenderer
from rest_framework.response import Response

from metax_api.exceptions import Http400
from metax_api.exceptions import Http400, Http403
from metax_api.models import File, XmlMetadata
from metax_api.renderers import XMLRenderer
from metax_api.services import AuthService, CommonService, FileService
Expand Down Expand Up @@ -92,22 +92,40 @@ def list(self, request, *args, **kwargs):
self.queryset_search_params['project_identifier__in'] = user_projects
return super().list(request, *args, **kwargs)

def update(self, request, *args, **kwargs):
#This have to be checked before updating common info
if not isinstance(self.request.data, dict):
raise Http400('request message body must be a single json object')

return super().update(request, *args, **kwargs)

def partial_update(self, request, *args, **kwargs):
#This have to be checked before updating common info
if not isinstance(self.request.data, dict):
raise Http400('request message body must be a single json object')

return super().partial_update(request, *args, **kwargs)

def get_object(self, search_params=None):
"""
Deals with allowed_projects query parameter. This is done here to avoid multiple
get_object calls in single request.
"""
obj = super().get_object(search_params)
if self.request.user.is_service:
allowed_projects = CommonService.get_list_query_param(self.request, 'allowed_projects')
if allowed_projects is not None and obj.project_identifier not in allowed_projects:
raise Http403('You do not have permission to update this file')

return obj

def update_bulk(self, request, *args, **kwargs):
"""
Checks that all files belongs to project in allowed_projects query parameter
if given.
"""
allowed_projects = CommonService.get_list_query_param(request, 'allowed_projects')

if allowed_projects is not None:
if not isinstance(request.data, list):
return Response(data={ 'detail': 'request.data is not a list'}, status=status.HTTP_400_BAD_REQUEST)

file_ids = [f['identifier'] for f in request.data]

if not FileService.verify_allowed_projects(allowed_projects, file_identifiers=file_ids):
return Response(data={"detail": "You do not have permission to update these files"},
status=status.HTTP_403_FORBIDDEN)
if self.request.user.is_service:
FileService.check_allowed_projects(request)

return super().update_bulk(request, *args, **kwargs)

Expand All @@ -116,20 +134,8 @@ def partial_update_bulk(self, request, *args, **kwargs):
Checks that all files belongs to project in allowed_projects query parameter
if given.
"""
allowed_projects = CommonService.get_list_query_param(request, 'allowed_projects')

if allowed_projects is not None:
if not isinstance(request.data, list):
return Response(data={ 'detail': 'request.data is not a list'}, status=status.HTTP_400_BAD_REQUEST)
try:
file_ids = [f['identifier'] for f in request.data]
except KeyError:
return Response(data={"detail": "File identifier is missing"},
status=status.HTTP_400_BAD_REQUEST)

if not FileService.verify_allowed_projects(allowed_projects, file_identifiers=file_ids):
return Response(data={"detail": "You do not have permission to update these files"},
status=status.HTTP_403_FORBIDDEN)
if self.request.user.is_service:
FileService.check_allowed_projects(request)

return super().partial_update_bulk(request, *args, **kwargs)

Expand Down
19 changes: 19 additions & 0 deletions src/metax_api/api/rpc/base/views/statistic_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from rest_framework.response import Response

from .common_rpc import CommonRPC
from metax_api.exceptions import Http400
from metax_api.services import StatisticService, CommonService as CS


Expand All @@ -21,6 +22,9 @@ class StatisticRPC(CommonRPC):

@list_route(methods=['get'], url_path='all_datasets_cumulative')
def all_datasets_cumulative(self, request):
if not request.query_params.get('from_date', None) or not request.query_params.get('to_date', None):
raise Http400('from_date and to_date parameters are required')

params = {
'from_date': request.query_params.get('from_date', None),
'to_date': request.query_params.get('to_date', None),
Expand All @@ -29,6 +33,9 @@ def all_datasets_cumulative(self, request):

@list_route(methods=['get'], url_path='catalog_datasets_cumulative')
def catalog_datasets_cumulative(self, request):
if not request.query_params.get('from_date', None) or not request.query_params.get('to_date', None):
raise Http400('from_date and to_date parameters are required')

params = {
'from_date': request.query_params.get('from_date', None),
'to_date': request.query_params.get('to_date', None),
Expand Down Expand Up @@ -59,6 +66,9 @@ def count_datasets(self, request):

@list_route(methods=['get'], url_path='deprecated_datasets_cumulative')
def deprecated_datasets_cumulative(self, request):
if not request.query_params.get('from_date', None) or not request.query_params.get('to_date', None):
raise Http400('from_date and to_date parameters are required')

params = {
'from_date': request.query_params.get('from_date', None),
'to_date': request.query_params.get('to_date', None),
Expand All @@ -67,6 +77,9 @@ def deprecated_datasets_cumulative(self, request):

@list_route(methods=['get'], url_path='end_user_datasets_cumulative')
def end_user_datasets_cumulative(self, request):
if not request.query_params.get('from_date', None) or not request.query_params.get('to_date', None):
raise Http400('from_date and to_date parameters are required')

params = {
'from_date': request.query_params.get('from_date', None),
'to_date': request.query_params.get('to_date', None),
Expand All @@ -75,6 +88,9 @@ def end_user_datasets_cumulative(self, request):

@list_route(methods=['get'], url_path='harvested_datasets_cumulative')
def harvested_datasets_cumulative(self, request):
if not request.query_params.get('from_date', None) or not request.query_params.get('to_date', None):
raise Http400('from_date and to_date parameters are required')

params = {
'from_date': request.query_params.get('from_date', None),
'to_date': request.query_params.get('to_date', None),
Expand All @@ -83,6 +99,9 @@ def harvested_datasets_cumulative(self, request):

@list_route(methods=['get'], url_path='organization_datasets_cumulative')
def organization_datasets_cumulative(self, request):
if not request.query_params.get('from_date', None) or not request.query_params.get('to_date', None):
raise Http400('from_date and to_date parameters are required')

params = {
'from_date': request.query_params.get('from_date', None),
'to_date': request.query_params.get('to_date', None),
Expand Down
22 changes: 13 additions & 9 deletions src/metax_api/services/auth_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,37 @@ def get_user_projects(request):
"""
Fetches users file projects from local file and from
token. On local file values must be a list of strings.
Projects are cached to request.user.user_projects to increase performance.
"""

if hasattr(request.user, 'user_projects'):
return request.user.user_projects

user_projects = AuthService.extract_file_projects_from_token(request.user.token)

if request.user.token is None:
raise Http404

username = request.user.token.get('CSCUserName', '')
file_projects = None

try:
with open(settings.ADDITIONAL_USER_PROJECTS_PATH, 'r') as file:
file_projects = json.load(file)
except FileNotFoundError:
_logger.info("No local file for user projects")
return user_projects
except Exception as e:
_logger.error(e)
return user_projects

try:
if isinstance(file_projects[username], list) and isinstance(file_projects[username][0], str):
for project in file_projects[username]:
user_projects.add(project)
else:
if file_projects:
if not file_projects.get(username, False):
_logger.info("No projects for user '%s' on local file" % username)
elif not isinstance(file_projects[username], list) or not isinstance(file_projects[username][0], str):
_logger.error("Projects on file are not list of strings")
except:
_logger.info("No projects for user '%s' on local file" % username)
else:
user_projects.update(p for p in file_projects[username])

request.user.user_projects = user_projects
return user_projects

@staticmethod
Expand Down
61 changes: 61 additions & 0 deletions src/metax_api/services/catalog_record_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,73 @@ def get_queryset_search_params(cls, request):
if request.query_params.get('pas_filter', False):
cls.set_pas_filter(queryset_search_params, request)

if CommonService.has_research_agent_query_params(request):
cls.set_actor_filters(queryset_search_params, request)

if request.query_params.get('data_catalog', False):
queryset_search_params['data_catalog__catalog_json__identifier__iregex'] = \
request.query_params['data_catalog']

return queryset_search_params

@staticmethod
def set_actor_filters(queryset_search_params, request):
"""
Set complex queries for filtering datasets by creator, curator, publisher and/or rights_holder.
'condition_separator' -parameter defines if these are OR'ed or AND'ed (Default=AND) together.
Q-filters from multiple queries are AND'ed together eventually.
"""
def _get_person_filter(agent, person):
name_filter = Q()
# only one publisher possible
if agent == 'publisher':
name_filter |= Q(**{ f'research_dataset__{agent}__name__iregex': person })
else:
# having same problem as in set_pas_filter below..
for i in range(3):
name_filter |= Q(**{ f'research_dataset__{agent}__{i}__name__iregex': person })

name_filter |= Q(**{ f'research_dataset__{agent}__contains': [{ 'name': person }] })

# regex will find matches from organization name fields so have to disable it
person_filter = Q(**{ f'research_dataset__{agent}__contains': [{ '@type': "Person" }] })
name_filter.add(person_filter, 'AND')

return name_filter

def _get_org_filter(agent, org):
name_filter = Q()
# only one publisher possible
if agent == 'publisher':
name_filter |= (Q(**{ f'research_dataset__{agent}__name__en__iregex': org }))
name_filter |= (Q(**{ f'research_dataset__{agent}__name__fi__iregex': org }))
else:
for i in range(3):
name_filter |= (Q(**{ f'research_dataset__{agent}__{i}__name__en__iregex': org }))
name_filter |= (Q(**{ f'research_dataset__{agent}__{i}__name__fi__iregex': org }))

name_filter |= (Q(**{ f'research_dataset__{agent}__contains': [{ 'name': {'en': org} }] }))
name_filter |= (Q(**{ f'research_dataset__{agent}__contains': [{ 'name': {'fi': org} }] }))

return name_filter

q_filter = Q()
separator = 'OR' if request.query_params.get('condition_separator', '').upper() == 'OR' else 'AND'

for agent in ['creator', 'curator', 'publisher', 'rights_holder']:
if request.query_params.get(f'{agent}_person'):
person = urllib.parse.unquote(request.query_params[f'{agent}_person'])
q_filter.add(_get_person_filter(agent, person), separator)

if request.query_params.get(f'{agent}_organization'):
org = urllib.parse.unquote(request.query_params[f'{agent}_organization'])
q_filter.add(_get_org_filter(agent, org), separator)

if 'q_filters' in queryset_search_params: # pragma: no cover
queryset_search_params['q_filters'].append(q_filter)
else:
queryset_search_params['q_filters'] = [q_filter]

@staticmethod
def set_pas_filter(queryset_search_params, request):
"""
Expand Down
15 changes: 15 additions & 0 deletions src/metax_api/services/common_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,21 @@ def get_list_query_param(request, param_name):

return set()

@staticmethod
def has_research_agent_query_params(request):
"""
Defines if request has query parameters for creator, curator, publisher or rights_holder.
Queries are for example 'creator_person' or 'publisher_organization'
Returns boolean
"""
fields = ['creator', 'curator', 'publisher', 'rights_holder']
types = ['organization', 'person']
for field in fields:
if any(request.query_params.get(f'{field}_{type}') for type in types):
return True

return False

@classmethod
def create_bulk(cls, request, serializer_class, **kwargs):
"""
Expand Down
27 changes: 20 additions & 7 deletions src/metax_api/services/file_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ def get_queryset_search_params(cls, request):
cls.check_user_belongs_to_project(request, project)
queryset_search_params['project_identifier'] = project

if request.query_params.get('file_path', False):
if not request.query_params.get('project_identifier', False):
raise Http400('query parameter project_identifier is required when using file_path filter')
queryset_search_params['file_path__contains'] = request.query_params['file_path']

return queryset_search_params

@classmethod
Expand Down Expand Up @@ -884,17 +889,25 @@ def _create_single(cls, common_info, initial_data, serializer_class, **kwargs):
return res

@classmethod
def verify_allowed_projects(cls, allowed_projects, file_identifiers=[]):
if file_identifiers:
def check_allowed_projects(cls, request):
allowed_projects = CommonService.get_list_query_param(request, 'allowed_projects')

if allowed_projects is not None:
if not isinstance(request.data, list):
raise Http400({ 'detail': [ 'request message body must be a single json object' ] })

try:
file_ids = [f['identifier'] for f in request.data]
except KeyError:
raise Http400({ 'detail': [ 'File identifier is missing' ] })

project_ids = [ pid for pid in File.objects
.filter(identifier__in=file_identifiers)
.filter(identifier__in=file_ids)
.values_list('project_identifier', flat=True)
.distinct('project_identifier') ]

if all(pid in allowed_projects for pid in project_ids):
return True

return False
if not all(pid in allowed_projects for pid in project_ids):
raise Http403({ 'detail': [ 'You do not have permission to update this file' ] })

@classmethod
def _create_bulk(cls, common_info, initial_data_list, results, serializer_class, **kwargs):
Expand Down
8 changes: 4 additions & 4 deletions src/metax_api/services/statistic_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def count_datasets(cls,
sql = '''
SELECT
count(cr.id) AS count,
COALESCE(SUM(COALESCE((research_dataset->>'total_files_byte_size')::int, 0)), 0) AS ida_byte_size
COALESCE(SUM(COALESCE((research_dataset->>'total_files_byte_size')::bigint, 0)), 0) AS ida_byte_size
from metax_api_catalogrecord as cr
join metax_api_datacatalog as dc on dc.id = cr.data_catalog_id
where 1=1
Expand Down Expand Up @@ -141,7 +141,7 @@ def total_datasets(cls, from_date, to_date):
WITH cte AS (
SELECT
date_trunc('month', cr.date_created) AS mon,
SUM(COALESCE((cr.research_dataset->>'total_files_byte_size')::int, 0)) AS mon_ida_byte_size
SUM(COALESCE((cr.research_dataset->>'total_files_byte_size')::bigint, 0)) AS mon_ida_byte_size
FROM metax_api_catalogrecord cr
GROUP BY mon
)
Expand Down Expand Up @@ -219,7 +219,7 @@ def _total_data_catalog_datasets(cls, from_date, to_date, access_types, dc_id):
WITH cte AS (
SELECT
date_trunc('month', cr.date_created) AS mon,
SUM(COALESCE((cr.research_dataset->>'total_files_byte_size')::int, 0)) AS mon_ida_byte_size
SUM(COALESCE((cr.research_dataset->>'total_files_byte_size')::bigint, 0)) AS mon_ida_byte_size
FROM metax_api_catalogrecord cr
JOIN metax_api_datacatalog as dc on dc.id = cr.data_catalog_id
where dc.id = %s
Expand Down Expand Up @@ -314,7 +314,7 @@ def _total_organization_datasets(cls, from_date, to_date, metadata_owner_org):
WITH cte AS (
SELECT
date_trunc('month', cr.date_created) AS mon,
SUM(COALESCE((cr.research_dataset->>'total_files_byte_size')::int, 0)) AS mon_ida_byte_size
SUM(COALESCE((cr.research_dataset->>'total_files_byte_size')::bigint, 0)) AS mon_ida_byte_size
FROM metax_api_catalogrecord cr
JOIN metax_api_datacatalog as dc on dc.id = cr.data_catalog_id
where dc.id = %s
Expand Down
9 changes: 8 additions & 1 deletion src/metax_api/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,14 @@
"delete_project": { "use": ["testuser", "metax"] }
},
"statistics": {
"something": { "use": ["all"] }
"count_datasets": { "use": ["all"] },
"all_datasets_cumulative": { "use": ["all"] },
"catalog_datasets_cumulative": { "use": ["all"] },
"end_user_datasets_cumulative": { "use": ["all"] },
"harvested_datasets_cumulative": { "use": ["all"] },
"deprecated_datasets_cumulative": { "use": ["all"] },
"organization_datasets_cumulative": { "use": ["all"] },
"unused_files": { "use": ["all"] },
}
}
}
Expand Down
Loading