Skip to content

Commit

Permalink
feat(datasets): add datasets endpoint (#111)
Browse files Browse the repository at this point in the history
  • Loading branch information
philloooo authored Feb 22, 2019
1 parent 2e325b9 commit 47a23a8
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ before_script:

# command to run tests
script:
- py.test -vv --cov=peregrine --cov-report xml tests/system_test.py tests/graphql/test_graphql.py
- py.test -vv --cov=peregrine --cov-report xml tests/system_test.py tests/graphql/test_graphql.py tests/graphql/test_datasets.py

after_script:
- python-codacy-coverage -r coverage.xml
Expand Down
24 changes: 24 additions & 0 deletions openapis/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,30 @@ tags:
- name: system
description: System endpoints
paths:
/datasets:
get:
tags:
- datasets
summary: Get counts for nodes for each project
parameters:
- in: query
name: nodes
description: comma delimited nodes to get counts for
schema:
type: string
responses:
'200':
description: node counts for each project
content:
application/json:
schema:
type: object
example: {project__A: {case: 0, aliquot: 1}, project_B: {case: 2, aliquot: 3}}
'401':
description: unauthorized request
'400':
description: invalid request

/graphql:
post:
tags:
Expand Down
3 changes: 3 additions & 0 deletions peregrine/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import peregrine
from peregrine import dictionary
from peregrine.blueprints import datasets
from .errors import APIError, setup_default_handlers, UnhealthyCheck
from .resources import submission
from .version_data import VERSION, COMMIT, DICTVERSION, DICTCOMMIT
Expand All @@ -32,12 +33,14 @@ def app_register_blueprints(app):
app.url_map.strict_slashes = False

app.register_blueprint(peregrine.blueprints.blueprint, url_prefix=v0+'/submission')
app.register_blueprint(datasets.blueprint, url_prefix=v0+'/datasets')


def app_register_duplicate_blueprints(app):
# TODO: (jsm) deprecate this v0 version under root endpoint. This
# root endpoint duplicates /v0 to allow gradual client migration
app.register_blueprint(peregrine.blueprints.blueprint, url_prefix='/submission')
app.register_blueprint(datasets.blueprint, url_prefix='/datasets')


def async_pool_init(app):
Expand Down
62 changes: 62 additions & 0 deletions peregrine/blueprints/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import flask
import os
import re

from peregrine.resources.submission import (
graphql,
set_read_access_projects_for_public_endpoint,
set_read_access_projects,
)

from cdiserrors import UserError, AuthZError

blueprint = flask.Blueprint("datasets", "datasets")


@blueprint.route("/", methods=["GET"])
def get_datasets():
"""
Get dataset level summary counts, if a deployment is configured
to set PUBLIC_DATASETS to True, this endpoint will be open to
anonymous users
"""
nodes = flask.request.args.get("nodes", "")
nodes = nodes.split(",")
if not nodes:
raise UserError("Need to provide target nodes in query param")
if os.environ.get("PUBLIC_DATASETS", False) == "true":
set_read_access_projects_for_public_endpoint()
else:
set_read_access_projects()
projects = flask.g.read_access_projects
if not projects:
raise AuthZError("You are not authorized to access any projects")
# construct a query that get counts for all projects
# because graphql can't add structure to group by projects,
# we labeled the count by project index and later parse it
# with regex to add structure to response
query = "{"
for i, project_id in enumerate(projects):
query += (
" ".join(
map(
lambda x: """i{i}_{node}: _{node}_count(project_id: "{p}")""".format(
i=i, node=x, p=project_id
),
nodes,
)
)
+ " "
)
query += "}"
data, errors = graphql.execute_query(query, variables={})
if errors:
return flask.jsonify({"data": data, "errors": errors}), 400
result = {project_id: {} for project_id in projects}

for name, value in data.iteritems():
match = re.search("^i(\d)_(.*)", name)
index = int(match.group(1))
node = match.group(2)
result[projects[index]][node] = value
return flask.jsonify(result)
18 changes: 18 additions & 0 deletions peregrine/resources/submission/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,24 @@ def get_open_project_ids():
for program in project['programs']
]

def set_read_access_projects_for_public_endpoint():
"""
Set the global user project list to include all projects for endpoint
that doesn't need authorization
"""

with flask.current_app.db.session_scope():
projects = (
flask.current_app.db
.nodes(models.Project)
.all()
)
flask.g.read_access_projects = [
program['name'] + '-' + project['code']
for project in projects
for program in project['programs']
]


def set_read_access_projects():
"""
Expand Down
2 changes: 1 addition & 1 deletion run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ userdatamodel-init --db test_userapi
python bin/setup_test_database.py
mkdir -p tests/resources/keys; cd tests/resources/keys; openssl genrsa -out test_private_key.pem 2048; openssl rsa -in test_private_key.pem -pubout -out test_public_key.pem; cd -

py.test -vv --cov=peregrine --cov-report xml tests/system_test.py tests/graphql/test_graphql.py
py.test -vv --cov=peregrine --cov-report xml tests/system_test.py tests/graphql/test_graphql.py tests/graphql/test_datasets.py
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,11 @@ def es_teardown():
es.indices.refresh(index=INDEX)

json_data.close()


@pytest.fixture
def public_dataset_api(request):
os.environ["PUBLIC_DATASETS"] = "true"
def tearDown():
os.environ["PUBLIC_DATASETS"] = "false"
request.addfinalizer(tearDown)
47 changes: 47 additions & 0 deletions tests/graphql/test_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from test_graphql import post_example_entities_together
from datamodelutils import models
import os


def test_authorized_call_with_protected_config(
client, submitter, pg_driver_clean, cgci_blgsp
):
post_example_entities_together(client, pg_driver_clean, submitter)
#: number of nodes to change project_id on, there should be 5
with pg_driver_clean.session_scope() as s:
cases = pg_driver_clean.nodes(models.Case).all()
case_count = len(cases)
for case in cases[0:-3]:
case.project_id = "OTHER-OTHER"
s.merge(case)
r = client.get("/datasets?nodes=case,aliquot", headers=submitter)
assert r.json.keys() == ["CGCI-BLGSP"]
assert r.json["CGCI-BLGSP"]["case"] == case_count - 2


def test_anonymous_call_with_protected_config(client, pg_driver_clean, cgci_blgsp):
r = client.get("/datasets?nodes=case,aliquot")
assert r.status_code == 401


def test_anonymous_call_with_public_config(
client, submitter, pg_driver_clean, cgci_blgsp, public_dataset_api
):
post_example_entities_together(client, pg_driver_clean, submitter)
with pg_driver_clean.session_scope() as s:
project = models.Project("other", code="OTHER")
program = pg_driver_clean.nodes(models.Program).props(name="CGCI").first()
project.programs = [program]
s.add(project)
aliquot_count = pg_driver_clean.nodes(models.Aliquot).count()
cases = pg_driver_clean.nodes(models.Case).all()
case_count = len(cases)
for case in cases[0:-3]:
case.project_id = "CGCI-OTHER"
s.merge(case)

r = client.get("/datasets?nodes=case,aliquot")
assert r.json["CGCI-BLGSP"]["case"] == case_count - 2
assert r.json["CGCI-BLGSP"]["aliquot"] == aliquot_count
assert r.json["CGCI-OTHER"]["aliquot"] == 0
assert r.json["CGCI-OTHER"]["case"] == 2

0 comments on commit 47a23a8

Please sign in to comment.