From 47a23a888ed614178e1ada94d6b344fe0510d394 Mon Sep 17 00:00:00 2001 From: Yajing Tang Date: Fri, 22 Feb 2019 17:09:09 -0600 Subject: [PATCH] feat(datasets): add datasets endpoint (#111) --- .travis.yml | 2 +- openapis/swagger.yaml | 24 +++++++++ peregrine/api.py | 3 ++ peregrine/blueprints/datasets.py | 62 ++++++++++++++++++++++ peregrine/resources/submission/__init__.py | 18 +++++++ run_tests.sh | 2 +- tests/conftest.py | 8 +++ tests/graphql/test_datasets.py | 47 ++++++++++++++++ 8 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 peregrine/blueprints/datasets.py create mode 100644 tests/graphql/test_datasets.py diff --git a/.travis.yml b/.travis.yml index c3a693bf..b8e879ca 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,7 +35,7 @@ before_script: # command to run tests script: - - py.test -vv --cov=peregrine --cov-report xml tests/system_test.py tests/graphql/test_graphql.py + - py.test -vv --cov=peregrine --cov-report xml tests/system_test.py tests/graphql/test_graphql.py tests/graphql/test_datasets.py after_script: - python-codacy-coverage -r coverage.xml diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml index d0191b48..efce733c 100644 --- a/openapis/swagger.yaml +++ b/openapis/swagger.yaml @@ -19,6 +19,30 @@ tags: - name: system description: System endpoints paths: + /datasets: + get: + tags: + - datasets + summary: Get counts for nodes for each project + parameters: + - in: query + name: nodes + description: comma delimited nodes to get counts for + schema: + type: string + responses: + '200': + description: node counts for each project + content: + application/json: + schema: + type: object + example: {project__A: {case: 0, aliquot: 1}, project_B: {case: 2, aliquot: 3}} + '401': + description: unauthorized request + '400': + description: invalid request + /graphql: post: tags: diff --git a/peregrine/api.py b/peregrine/api.py index e3ef9fe2..4e03aa60 100644 --- a/peregrine/api.py +++ b/peregrine/api.py @@ -15,6 +15,7 @@ import peregrine from peregrine import dictionary +from peregrine.blueprints import datasets from .errors import APIError, setup_default_handlers, UnhealthyCheck from .resources import submission from .version_data import VERSION, COMMIT, DICTVERSION, DICTCOMMIT @@ -32,12 +33,14 @@ def app_register_blueprints(app): app.url_map.strict_slashes = False app.register_blueprint(peregrine.blueprints.blueprint, url_prefix=v0+'/submission') + app.register_blueprint(datasets.blueprint, url_prefix=v0+'/datasets') def app_register_duplicate_blueprints(app): # TODO: (jsm) deprecate this v0 version under root endpoint. This # root endpoint duplicates /v0 to allow gradual client migration app.register_blueprint(peregrine.blueprints.blueprint, url_prefix='/submission') + app.register_blueprint(datasets.blueprint, url_prefix='/datasets') def async_pool_init(app): diff --git a/peregrine/blueprints/datasets.py b/peregrine/blueprints/datasets.py new file mode 100644 index 00000000..f755dfe4 --- /dev/null +++ b/peregrine/blueprints/datasets.py @@ -0,0 +1,62 @@ +import flask +import os +import re + +from peregrine.resources.submission import ( + graphql, + set_read_access_projects_for_public_endpoint, + set_read_access_projects, +) + +from cdiserrors import UserError, AuthZError + +blueprint = flask.Blueprint("datasets", "datasets") + + +@blueprint.route("/", methods=["GET"]) +def get_datasets(): + """ + Get dataset level summary counts, if a deployment is configured + to set PUBLIC_DATASETS to True, this endpoint will be open to + anonymous users + """ + nodes = flask.request.args.get("nodes", "") + nodes = nodes.split(",") + if not nodes: + raise UserError("Need to provide target nodes in query param") + if os.environ.get("PUBLIC_DATASETS", False) == "true": + set_read_access_projects_for_public_endpoint() + else: + set_read_access_projects() + projects = flask.g.read_access_projects + if not projects: + raise AuthZError("You are not authorized to access any projects") + # construct a query that get counts for all projects + # because graphql can't add structure to group by projects, + # we labeled the count by project index and later parse it + # with regex to add structure to response + query = "{" + for i, project_id in enumerate(projects): + query += ( + " ".join( + map( + lambda x: """i{i}_{node}: _{node}_count(project_id: "{p}")""".format( + i=i, node=x, p=project_id + ), + nodes, + ) + ) + + " " + ) + query += "}" + data, errors = graphql.execute_query(query, variables={}) + if errors: + return flask.jsonify({"data": data, "errors": errors}), 400 + result = {project_id: {} for project_id in projects} + + for name, value in data.iteritems(): + match = re.search("^i(\d)_(.*)", name) + index = int(match.group(1)) + node = match.group(2) + result[projects[index]][node] = value + return flask.jsonify(result) diff --git a/peregrine/resources/submission/__init__.py b/peregrine/resources/submission/__init__.py index 99f7d12b..b120bdc5 100644 --- a/peregrine/resources/submission/__init__.py +++ b/peregrine/resources/submission/__init__.py @@ -43,6 +43,24 @@ def get_open_project_ids(): for program in project['programs'] ] +def set_read_access_projects_for_public_endpoint(): + """ + Set the global user project list to include all projects for endpoint + that doesn't need authorization + """ + + with flask.current_app.db.session_scope(): + projects = ( + flask.current_app.db + .nodes(models.Project) + .all() + ) + flask.g.read_access_projects = [ + program['name'] + '-' + project['code'] + for project in projects + for program in project['programs'] + ] + def set_read_access_projects(): """ diff --git a/run_tests.sh b/run_tests.sh index 1d384f0c..5b8efaa6 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -11,4 +11,4 @@ userdatamodel-init --db test_userapi python bin/setup_test_database.py mkdir -p tests/resources/keys; cd tests/resources/keys; openssl genrsa -out test_private_key.pem 2048; openssl rsa -in test_private_key.pem -pubout -out test_public_key.pem; cd - -py.test -vv --cov=peregrine --cov-report xml tests/system_test.py tests/graphql/test_graphql.py +py.test -vv --cov=peregrine --cov-report xml tests/system_test.py tests/graphql/test_graphql.py tests/graphql/test_datasets.py diff --git a/tests/conftest.py b/tests/conftest.py index dc447b86..8d807416 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -238,3 +238,11 @@ def es_teardown(): es.indices.refresh(index=INDEX) json_data.close() + + +@pytest.fixture +def public_dataset_api(request): + os.environ["PUBLIC_DATASETS"] = "true" + def tearDown(): + os.environ["PUBLIC_DATASETS"] = "false" + request.addfinalizer(tearDown) diff --git a/tests/graphql/test_datasets.py b/tests/graphql/test_datasets.py new file mode 100644 index 00000000..f8788725 --- /dev/null +++ b/tests/graphql/test_datasets.py @@ -0,0 +1,47 @@ +from test_graphql import post_example_entities_together +from datamodelutils import models +import os + + +def test_authorized_call_with_protected_config( + client, submitter, pg_driver_clean, cgci_blgsp +): + post_example_entities_together(client, pg_driver_clean, submitter) + #: number of nodes to change project_id on, there should be 5 + with pg_driver_clean.session_scope() as s: + cases = pg_driver_clean.nodes(models.Case).all() + case_count = len(cases) + for case in cases[0:-3]: + case.project_id = "OTHER-OTHER" + s.merge(case) + r = client.get("/datasets?nodes=case,aliquot", headers=submitter) + assert r.json.keys() == ["CGCI-BLGSP"] + assert r.json["CGCI-BLGSP"]["case"] == case_count - 2 + + +def test_anonymous_call_with_protected_config(client, pg_driver_clean, cgci_blgsp): + r = client.get("/datasets?nodes=case,aliquot") + assert r.status_code == 401 + + +def test_anonymous_call_with_public_config( + client, submitter, pg_driver_clean, cgci_blgsp, public_dataset_api +): + post_example_entities_together(client, pg_driver_clean, submitter) + with pg_driver_clean.session_scope() as s: + project = models.Project("other", code="OTHER") + program = pg_driver_clean.nodes(models.Program).props(name="CGCI").first() + project.programs = [program] + s.add(project) + aliquot_count = pg_driver_clean.nodes(models.Aliquot).count() + cases = pg_driver_clean.nodes(models.Case).all() + case_count = len(cases) + for case in cases[0:-3]: + case.project_id = "CGCI-OTHER" + s.merge(case) + + r = client.get("/datasets?nodes=case,aliquot") + assert r.json["CGCI-BLGSP"]["case"] == case_count - 2 + assert r.json["CGCI-BLGSP"]["aliquot"] == aliquot_count + assert r.json["CGCI-OTHER"]["aliquot"] == 0 + assert r.json["CGCI-OTHER"]["case"] == 2