From a2938002da2dbee2ac10e74901c46b141abf10ba Mon Sep 17 00:00:00 2001 From: Colleen Xu Date: Wed, 18 Sep 2024 17:09:07 -0700 Subject: [PATCH 1/4] mychem: remove chembl-treats operations Translator wants us to use Multiomics Clinical Trials KP (CTKP) instead --- mychem.info/openapi_full.yml | 94 +++--------------------------------- 1 file changed, 7 insertions(+), 87 deletions(-) diff --git a/mychem.info/openapi_full.yml b/mychem.info/openapi_full.yml index ab4eff7..c752835 100644 --- a/mychem.info/openapi_full.yml +++ b/mychem.info/openapi_full.yml @@ -331,8 +331,6 @@ paths: - "$ref": "#/components/x-bte-kgs-operations/drugMechChemblEnsembl-rev" - "$ref": "#/components/x-bte-kgs-operations/drugMechChemblUniprot" - "$ref": "#/components/x-bte-kgs-operations/drugMechChemblUniprot-rev" - - "$ref": "#/components/x-bte-kgs-operations/treatsChembl" - - "$ref": "#/components/x-bte-kgs-operations/treatsChembl-rev" - "$ref": "#/components/x-bte-kgs-operations/drugcentral-bioactivity-activator" - "$ref": "#/components/x-bte-kgs-operations/drugcentral-bioactivity-agonist" - "$ref": "#/components/x-bte-kgs-operations/drugcentral-bioactivity-antagonist" @@ -663,20 +661,6 @@ components: ref_pmcid: chembl.drug_mechanisms.mechanism_refs.PMC ## no prefix (but IDs start with "PMC") ref_url: chembl.drug_mechanisms.mechanism_refs.url output_name: chembl.drug_mechanisms.target_name - chembl-treats: - MESH: chembl.drug_indications.mesh_id - ref_clinicaltrials: chembl.drug_indications.indication_refs.ClinicalTrials ## no prefix - ## this url field will include expanded clinicaltrials (aka duplicates!) - ## but is required for other references: ATC, DailyMed, FDA - ref_url: chembl.drug_indications.indication_refs.url - "biolink:max_research_phase": chembl.drug_indications.max_phase_for_ind - ## commenting out because data-processing / biolink-modeling issues - # year_first_approved: chembl.drug_indications.first_approval - chembl-treats-rev: - "CHEMBL.COMPOUND": chembl.molecule_chembl_id ## no prefix - ref_clinicaltrials: chembl.drug_indications.indication_refs.ClinicalTrials ## no prefix - ref_url: chembl.drug_indications.indication_refs.url - "biolink:max_research_phase": chembl.drug_indications.max_phase_for_ind drugcentral-bioactivity: UniProtKB: drugcentral.bioactivity.uniprot.uniprot_id ## no prefix ## commenting out because data-processing / biolink-modeling issues @@ -897,12 +881,13 @@ components: testExamples: - qInput: "RHEA:23312" ## 2 sucrose = 1(F)-beta-D-fructosylsucrose + D-glucose oneOutput: "CHEBI:16885" ## 1-kestose / 1(F)-beta-D-fructosylsucrose aka PUBCHEM.COMPOUND:440080 - ## chembl - ## knowledge_level / agent_type: assuming manual curation. - ## - drug_mechanisms: - ## - can filter for human vs non-human targets and by target-type - ## - other target_organism values: Bacteria, Human respiratory syncytial virus, Fungi, Human herpesvirus 1, Hepatitis C virus, - ## Human immunodeficiency virus 1, Varicella-zoster virus (strain Oka vaccine) (HHV-3) (Human herpesvirus 3) + ## chembl + ## knowledge_level / agent_type: assuming manual curation. + ## - not including drug_indications. Translator wants us to use Multiomics Clinical Trials KP (CTKP) instead + ## - drug_mechanisms: + ## - can filter for human vs non-human targets and by target-type + ## - other target_organism values: Bacteria, Human respiratory syncytial virus, Fungi, Human herpesvirus 1, Hepatitis C virus, + ## Human immunodeficiency virus 1, Varicella-zoster virus (strain Oka vaccine) (HHV-3) (Human herpesvirus 3) drugMechChemblEnsembl: ## 60 chem/records have chembl.drug_mechanisms.target_components.ensembl_gene - supportBatch: true @@ -1047,71 +1032,6 @@ components: testExamples: - qInput: "UniProtKB:Q16539" ## MAP kinase p38 alpha oneOutput: "CHEMBL.COMPOUND:CHEMBL585902" ## AMG-548 aka PUBCHEM.COMPOUND:11167112 - treatsChembl: - ## all drug_indication records have a mesh ID for the indication... - - supportBatch: true - useTemplating: true - inputs: - - id: CHEMBL.COMPOUND - semantic: SmallMolecule - requestBody: - body: - q: "{{ queryInputs }}" ## no prefix - scopes: chembl.molecule_chembl_id - outputs: - - id: MESH - semantic: Disease - parameters: - ## not including: mesh_heading (name) - ## including: first_approval is only in some records (not indexed right now) - ## commenting out because data-processing / biolink-modeling issues - # fields: >- - # chembl.drug_indications.mesh_id, - # chembl.drug_indications.max_phase_for_ind, - # chembl.drug_indications.indication_refs.url, - # chembl.drug_indications.first_approval - fields: >- - chembl.drug_indications.mesh_id, - chembl.drug_indications.indication_refs.ClinicalTrials, - chembl.drug_indications.indication_refs.url, - chembl.drug_indications.max_phase_for_ind - size: 1000 ## note size limit; added just in case - predicate: in_clinical_trials_for - source: "infores:chembl" - knowledge_level: knowledge_assertion - agent_type: manual_agent - response_mapping: - "$ref": "#/components/x-bte-response-mapping/chembl-treats" - testExamples: - - qInput: "CHEMBL.COMPOUND:CHEMBL471737" ## IVABRADINE aka PUBCHEM.COMPOUND:132999 - oneOutput: "MESH:D000072658" ## Non-ST Elevated Myocardial Infarction aka UMLS:C4255010 - treatsChembl-rev: - - supportBatch: false - useTemplating: true - inputs: - - id: MESH - semantic: Disease - requestBody: - body: - q: "{{ queryInputs }}" ## no prefix - scopes: chembl.drug_indications.mesh_id - outputs: - - id: CHEMBL.COMPOUND - semantic: SmallMolecule - parameters: - ## need the whole thing for jmespath to work - fields: chembl.molecule_chembl_id,chembl.drug_indications ## no prefix - size: 1000 ## note size limit - jmespath: "chembl.drug_indications|[?mesh_id=='{{ queryInputs }}']" - predicate: tested_by_clinical_trials_of - source: "infores:chembl" - knowledge_level: knowledge_assertion - agent_type: manual_agent - response_mapping: - "$ref": "#/components/x-bte-response-mapping/chembl-treats-rev" - testExamples: - - qInput: "MESH:D014376" ## Tuberculosis aka MONDO:0018076 - oneOutput: "CHEMBL.COMPOUND:CHEMBL1441" ## ETHIONAMIDE aka PUBCHEM.COMPOUND:2761171 ## DrugCentral source ## Not annotating: ## - drugcentral.approval (node properties?) From e4153d544863f525ee3f54b5310658071adf696c Mon Sep 17 00:00:00 2001 From: Colleen Xu Date: Wed, 18 Sep 2024 17:09:53 -0700 Subject: [PATCH 2/4] repodb: remove non-approved-treatment operations Translator wants us to use Multiomics Clinical Trials KP (CTKP) instead --- repodb/smartapi.yaml | 98 ++------------------------------------------ 1 file changed, 3 insertions(+), 95 deletions(-) diff --git a/repodb/smartapi.yaml b/repodb/smartapi.yaml index 7b85738..a2188a7 100644 --- a/repodb/smartapi.yaml +++ b/repodb/smartapi.yaml @@ -313,12 +313,10 @@ paths: # summary: Make batch gene queries and return matching gene hits tags: - query - ## 4 operations (2 sets) + ## 2 operations (1 set) x-bte-kgs-operations: - $ref: '#/components/x-bte-kgs-operations/drug-disease-approved' - $ref: '#/components/x-bte-kgs-operations/disease-drug-approved' - - $ref: '#/components/x-bte-kgs-operations/drug-disease-not-approved' - - $ref: '#/components/x-bte-kgs-operations/disease-drug-not-approved' components: parameters: callback: @@ -580,9 +578,10 @@ components: # - type: string x-bte-kgs-operations: ## NOTES + ## - not annotating non-approved drug info: this is from parsing/cleaning clinicaltrials.gov data. + ## Translator wants us to use Multiomics Clinical Trials KP (CTKP) instead ## - knowledge_level / agent_type: based on paper https://www.nature.com/articles/sdata201729 ## - approved drug info from downloading drugcentral / drugbank - ## - non-approved drug info from data parsing/cleaning clinicaltrials.gov data ## - possible edge-attributes: ## - 2185 / 10602 records have detailed status (free-text): https://biothings.ncats.io/repodb/query?q=_exists_:trials.detailed_status ## - status: all records have this, but the values need to be mapped to biolink-model ClinicalApprovalStatusEnum @@ -648,103 +647,12 @@ components: testExamples: - qInput: "UMLS:C0032797" ## Postpartum Hemorrhage / HP:0011891 Post-partum hemorrhage oneOutput: "DRUGBANK:DB00107" ## Oxytocin / PUBCHEM.COMPOUND:439302 - drug-disease-not-approved: - ## 2583 records (going off of existence of trials.phase which doesn't seem to exist in approved entries) - ## biothings.ncats.io/repodb/query?q=_exists_:trials.phase - ## phase values are either Phase 1, Phase 2, Phase 3 (so all went through clinical trials) - ## status values are either terminated, withdrawn, suspended - - supportBatch: true - useTemplating: true ## flag to say templating is being used below - inputs: - - id: DRUGBANK - semantic: SmallMolecule - requestBody: - body: - q: "{{ queryInputs | rmPrefix() }}" - scopes: drug.drugbank - outputs: - - id: UMLS - semantic: Disease - parameters: - ## include clinical trial field since some will have it - fields: >- - indication.umls, - drug.name, - indication.name, - trials.id, - trials.status - size: 1000 - filter: _exists_:trials.phase - predicate: in_clinical_trials_for - source: "infores:repodb" - knowledge_level: observation - agent_type: automated_agent - response_mapping: - "$ref": "#/components/x-bte-response-mapping/disease-clinicaltrial" - testExamples: - ## for Terminated - - qInput: "DRUGBANK:DB00043" ## Omalizumab / UNII:2P471X1Z11 - oneOutput: "UMLS:C0004096" ## Asthma / MONDO:0004979 - ## for Withdrawn - - qInput: "DRUGBANK:DB14482" ## Sodium ascorbate / PUBCHEM.COMPOUND:23667548 - oneOutput: "UMLS:C0004238" ## Atrial Fibrillation / MONDO:0004981 - ## for Suspended - - qInput: "DRUGBANK:DB00104" ## Octreotide / PUBCHEM.COMPOUND:448601 - oneOutput: "UMLS:C0001206" ## Acromegaly / MONDO:0019933 - disease-drug-not-approved: - - supportBatch: true - useTemplating: true - inputs: - - id: UMLS - semantic: Disease - requestBody: - body: - q: "{{ queryInputs | rmPrefix() }}" - scopes: indication.umls - outputs: - - id: DRUGBANK - semantic: SmallMolecule - parameters: - fields: >- - drug.drugbank, - drug.name, - indication.name, - trials.id, - trials.status - size: 1000 - filter: _exists_:trials.phase - predicate: tested_by_clinical_trials_of - source: "infores:repodb" - knowledge_level: observation - agent_type: automated_agent - response_mapping: - "$ref": "#/components/x-bte-response-mapping/drug-clinicaltrial" - testExamples: - ## for Terminated - - qInput: "UMLS:C0184567" ## Acute onset pain / NCIT:C27003 - oneOutput: "DRUGBANK:DB14123" ## Menthol / PUBCHEM.COMPOUND:1254 - ## for Withdrawn - - qInput: "UMLS:C0022104" ## Irritable Bowel Syndrome / MONDO:0005052 - oneOutput: "DRUGBANK:DB14005" ## Dimethicone 410 / PUBCHEM.COMPOUND:131740050 Bis(tert-butylsilyloxy)-dimethylsilane - ## for Suspended - - qInput: "UMLS:C0406317" ## Chronic small plaque psoriasis - oneOutput: "DRUGBANK:DB14006" ## Choline salicylate / PUBCHEM.COMPOUND:54686350 x-bte-response-mapping: disease-only: UMLS: indication.umls input_name: drug.name output_name: indication.name - disease-clinicaltrial: - UMLS: indication.umls - input_name: drug.name - output_name: indication.name - ref_clinicaltrials: trials.id ## no prefix drug-only: DRUGBANK: drug.drugbank input_name: indication.name output_name: drug.name - drug-clinicaltrial: - DRUGBANK: drug.drugbank - input_name: indication.name - output_name: drug.name - ref_clinicaltrials: trials.id ## no prefix From 871a552aa4e0aa915a3951a13581f1e58bde4b21 Mon Sep 17 00:00:00 2001 From: Colleen Xu Date: Thu, 19 Dec 2024 23:35:45 -0800 Subject: [PATCH 3/4] mychem: adjust comment indent for chembl info --- mychem.info/openapi_full.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mychem.info/openapi_full.yml b/mychem.info/openapi_full.yml index bc30fa2..e6ce8a7 100644 --- a/mychem.info/openapi_full.yml +++ b/mychem.info/openapi_full.yml @@ -892,14 +892,14 @@ components: testExamples: - qInput: "RHEA:23312" ## 2 sucrose = 1(F)-beta-D-fructosylsucrose + D-glucose oneOutput: "CHEBI:16885" ## 1-kestose / 1(F)-beta-D-fructosylsucrose aka PUBCHEM.COMPOUND:440080 - ## chembl - ## knowledge_level / agent_type: assuming manual curation. - ## - not including drug_indications. Translator wants us to use Multiomics Clinical Trials KP (CTKP) instead - ## - drug_mechanisms: - ## - Gene field: always ensembl_id or uniprot, never both. Not sure if any entity-merging was ever done - ## - can filter for human vs non-human targets and by target-type - ## - other target_organism values: Bacteria, Human respiratory syncytial virus, Fungi, Human herpesvirus 1, Hepatitis C virus, - ## Human immunodeficiency virus 1, Varicella-zoster virus (strain Oka vaccine) (HHV-3) (Human herpesvirus 3) + ## chembl + ## knowledge_level / agent_type: assuming manual curation. + ## - not including drug_indications. Translator wants us to use Multiomics Clinical Trials KP (CTKP) instead + ## - drug_mechanisms: + ## - Gene field: always ensembl_id or uniprot, never both. Not sure if any entity-merging was ever done + ## - can filter for human vs non-human targets and by target-type + ## - other target_organism values: Bacteria, Human respiratory syncytial virus, Fungi, Human herpesvirus 1, Hepatitis C virus, + ## Human immunodeficiency virus 1, Varicella-zoster virus (strain Oka vaccine) (HHV-3) (Human herpesvirus 3) drugMechChemblEnsembl: ## 74 chem/records have chembl.drug_mechanisms.target_components.ensembl_gene https://mychem.info/v1/query?q=_exists_:chembl.drug_mechanisms.target_components.ensembl_gene&fields=chembl - supportBatch: true From 64414937cae14ef9abb17fbf6ae0590d5c745a65 Mon Sep 17 00:00:00 2001 From: Colleen Xu Date: Fri, 27 Dec 2024 23:56:16 -0800 Subject: [PATCH 4/4] mychem: remove aeolusTreats operations --- mychem.info/openapi_full.yml | 80 ++++-------------------------------- 1 file changed, 7 insertions(+), 73 deletions(-) diff --git a/mychem.info/openapi_full.yml b/mychem.info/openapi_full.yml index 22ef3a4..91ae3e4 100644 --- a/mychem.info/openapi_full.yml +++ b/mychem.info/openapi_full.yml @@ -321,8 +321,6 @@ paths: tags: - query x-bte-kgs-operations: - - "$ref": "#/components/x-bte-kgs-operations/aeolusTreats" - - "$ref": "#/components/x-bte-kgs-operations/aeolusTreats-rev" - "$ref": "#/components/x-bte-kgs-operations/chebiToReactome" - "$ref": "#/components/x-bte-kgs-operations/chebiToReactome-rev" - "$ref": "#/components/x-bte-kgs-operations/chebiToRhea" @@ -627,12 +625,6 @@ components: # type: array # - type: string x-bte-response-mapping: - aeolusIndication-meddra: - MEDDRA: aeolus.indications.meddra_code - "biolink:evidence_count": aeolus.indications.count - aeolusIndication-unii: - UNII: aeolus.unii - "biolink:evidence_count": aeolus.indications.count chebiXrefs-reactome: REACT: chebi.xrefs.reactome chebi: @@ -715,73 +707,15 @@ components: fdaOrphanDrug-pubchemCompound: "PUBCHEM.COMPOUND": fda_orphan_drug.pubchem_cid ## no prefix x-bte-kgs-operations: - ## deciding not to annotate aeolus.outcomes since this list of side effects can be very long + ## AEOLUS: + ## - not annotating indications because Translator CQS/Matt Brush team asked us not to; + ## asked us to use Multiomics Drug Approvals instead + ## - deciding not to annotate aeolus.outcomes since this list of side effects can be very long ## this says mychem truncates it to 5000...https://docs.mychem.info/en/latest/doc/data_source.html - ## deciding not to annotate sider since it's not clear how the records are structured (is meddra ID for the side effect?) + ## SIDER: deciding not to annotate since it's not clear how the records are structured (is meddra ID for the side effect?) ## it is also long...https://docs.mychem.info/en/latest/doc/data_source.html - aeolusTreats: - ## - chose to map to Disease semantic type since I saw SRI map some MEDDRA IDs to Disease... - ## https://nodenormalization-sri.renci.org/1.2/get_normalized_nodes?curie=MEDDRA%3A10021639&conflate=true - ## But some seem like PhenotypicFeatures... - - supportBatch: true - useTemplating: true - inputs: - - id: UNII - semantic: SmallMolecule - requestBody: - body: - q: "{{ queryInputs }}" ## no prefix - scopes: aeolus.unii - ## all 1551 records have unii and rxcui (another option), only 1016 have inchikey - outputs: - - id: MEDDRA - semantic: Disease - parameters: - filter: _exists_:aeolus.indications ## need the field to exist for jmespath to work - fields: aeolus.indications ## need the whole thing for jmespath to work - jmespath: aeolus.indications|[?count>`20`] - ## don't need always_list because aeolus.indications can be a single-element array - size: 1000 ## note size limit; added just in case - ## seem to come from the drug adverse event self-reporting - predicate: applied_to_treat - source: "infores:aeolus" - knowledge_level: observation - agent_type: manual_agent - response_mapping: - "$ref": "#/components/x-bte-response-mapping/aeolusIndication-meddra" - testExamples: - - qInput: "UNII:90347YTW5F" ## alfuzosin aka PUBCHEM.COMPOUND:2092 - oneOutput: "MEDDRA:10004446" ## Benign prostatic hyperplasia aka MONDO:0010811 - aeolusTreats-rev: - - supportBatch: false - useTemplating: true - inputs: - - id: MEDDRA - semantic: Disease - requestBody: - body: - q: "{{ queryInputs }}" ## no prefix - scopes: aeolus.indications.meddra_code - outputs: - - id: UNII - semantic: SmallMolecule - parameters: - filter: _exists_:aeolus.indications ## need the field to exist for jmespath to work - ## need the whole thing for jmespath to work - fields: aeolus.unii,aeolus.indications ## no prefix - size: 1000 ## note size limit - jmespath: "aeolus.indications|[?count>`20` && meddra_code=='{{ queryInputs }}']" - jmespath_exclude_empty: true - predicate: treatment_applications_from - source: "infores:aeolus" - knowledge_level: observation - agent_type: manual_agent - response_mapping: - "$ref": "#/components/x-bte-response-mapping/aeolusIndication-unii" - testExamples: - - qInput: "MEDDRA:10012378" ## Depression aka MONDO:0002050 - oneOutput: "UNII:82VFR53I78" ## Aripiprazole aka PUBCHEM.COMPOUND:60795 - ## chebi xrefs actually include relationships to other bioentities... + ## CHEBI: + ## - chebi xrefs actually include relationships to other bioentities... ## - note that truncation was done by MyChem for rhea... ## see https://docs.mychem.info/en/latest/doc/data_source.html ## - not including chebi.xrefs.uniprot because: