From f3c1babfc50a8b6f685425145ffa70d0d7b22804 Mon Sep 17 00:00:00 2001 From: Trent Smith <1429913+Bento007@users.noreply.github.com> Date: Wed, 18 Oct 2017 09:59:32 -0700 Subject: [PATCH] Multiple result views supported for search (#564) Added format to post search. Returns all meta data for bundles in results when format=raw. https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html - added description to format in dss-api.yml. Added smoketest case to use format raw. - updated test search to validate new raw format --- dss-api.yml | 6 ++++++ dss/api/search.py | 22 ++++++++++++++++------ tests/smoketest.py | 2 +- tests/test_search.py | 14 ++++++++++++++ 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/dss-api.yml b/dss-api.yml index 19910fcd84..307accf73d 100644 --- a/dss-api.yml +++ b/dss-api.yml @@ -48,6 +48,12 @@ paths: type: object required: - es_query + - name: format + in: query + description: Specifies the output format. `raw` will return the json meta data in the results. + required: false + type: string + enum: [raw] - name: replica in: query description: Replica to search. diff --git a/dss/api/search.py b/dss/api/search.py index df6a059126..084464b6c0 100644 --- a/dss/api/search.py +++ b/dss/api/search.py @@ -22,7 +22,11 @@ def check(cls, n): @dss_handler -def post(json_request_body: dict, replica: str, per_page: int, _scroll_id: typing.Optional[str] = None) -> dict: +def post(json_request_body: dict, + replica: str, + per_page: int, + _scroll_id: typing.Optional[str] = None, + format: typing.Optional[str] = None) -> dict: es_query = json_request_body['es_query'] get_logger().debug("Received posted query. Replica: %s Query: %s Per_page: %i Timeout: %s Scroll_id: %s", replica, json.dumps(es_query, indent=4), per_page, _scroll_id) @@ -63,14 +67,20 @@ def post(json_request_body: dict, replica: str, per_page: int, _scroll_id: typin # TODO: (tsmith12) allow users to retrieve previous search results _scroll_id = page['_scroll_id'] - result_list = [{ - 'bundle_id': hit['_id'], - 'bundle_url': _build_bundle_url(hit, replica), - 'search_score': hit['_score'] - } for hit in page['hits']['hits']] + + result_list = [] # type: typing.List[dict] + for hit in page['hits']['hits']: + result = {'bundle_id': hit['_id'], + 'bundle_url': _build_bundle_url(hit, replica), + 'search_score': hit['_score'] + } + if format == 'raw': + result['meta_data'] = hit['_source'] + result_list.append(result) # TODO: (tsmith12) if page returns 0 hits, then all results have been found. delete search id request_body = jsonify({'es_query': es_query, 'results': result_list, 'total_hits': page['hits']['total']}) + if len(result_list) < per_page: response = make_response(request_body, requests.codes.ok) else: diff --git a/tests/smoketest.py b/tests/smoketest.py index ac411562e5..4f91dda0d0 100755 --- a/tests/smoketest.py +++ b/tests/smoketest.py @@ -69,7 +69,7 @@ def run(command, runner=check_call, **kwargs): run("hca dss download --replica gcp $(jq -r .bundle_uuid upload.json)") for replica in "aws", "gcp": - run(f"hca dss post-search --es-query='{{}}' --replica {replica}") + run(f"hca dss post-search --es-query='{{}}' --format raw --replica {replica}") search_route = "https://${API_HOST}/v1/search" for replica in "aws", "gcp": diff --git a/tests/test_search.py b/tests/test_search.py index 7798e6197c..1a3a5834f8 100755 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -244,6 +244,20 @@ def test_page_has_N_results_when_per_page_is_N(self): next_url = self.get_next_url(search_obj.response.headers) self.verify_next_url(next_url, per_page) + def test_format_is_raw(self): + bundles = self.populate_search_index(self.index_document, 1) + self.check_count(smartseq2_paired_ends_query, 1) + url = self.build_url(url_params={'format': 'raw'}) + search_obj = self.assertPostResponse( + path=url, + json_request_body=dict(es_query=smartseq2_paired_ends_query), + expected_code=requests.codes.ok) + next_url = self.get_next_url(search_obj.response.headers) + self.assertIsNone(next_url) + self.verify_search_result(search_obj.json, smartseq2_paired_ends_query, 1, 1) + self.verify_bundles(search_obj.json['results'], bundles) + self.assertEqual(search_obj.json['results'][0]['meta_data'], self.index_document) + def test_error_returned_when_per_page_is_out_of_range(self): expected_error = ExpectedErrorFields(code="illegal_arguments", status=requests.codes.bad_request,