diff --git a/.github/workflows/hail-search-unit-tests.yaml b/.github/workflows/hail-search-unit-tests.yaml index 4d12c8b647..0a493d8f21 100644 --- a/.github/workflows/hail-search-unit-tests.yaml +++ b/.github/workflows/hail-search-unit-tests.yaml @@ -27,7 +27,8 @@ jobs: pip install -r hail_search/requirements-test.txt - name: Run coverage tests run: | - export DATASETS_DIR=./hail_search/fixtures + export HAIL_SEARCH_DATA_DIR=./hail_search/fixtures + export REFERENCE_DATASETS_DIR=./hail_search/fixtures export MAX_GENE_INTERVALS=3 export MACHINE_MEM=24 export JAVA_OPTS_XSS=16M diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 5af0b840fc..eb75d6a294 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -9,8 +9,8 @@ HAS_ALT, HAS_REF,INHERITANCE_FILTERS, PATH_FREQ_OVERRIDE_CUTOFF, MALE, RECESSIVE, REF_ALT, REF_REF, MAX_LOAD_INTERVALS, \ UNAFFECTED_ID, X_LINKED_RECESSIVE, XPOS, OMIM_SORT, FAMILY_GUID_FIELD, GENOTYPES_FIELD, AFFECTED_ID_MAP -DATASETS_DIR = os.environ.get('DATASETS_DIR', '/hail_datasets') -SSD_DATASETS_DIR = os.environ.get('SSD_DATASETS_DIR', DATASETS_DIR) +HAIL_SEARCH_DATA_DIR = os.environ.get('HAIL_SEARCH_DATA_DIR', '/seqr/seqr-hail-search-data') +IN_MEMORY_DIR = os.environ.get('IN_MEMORY_DIR', HAIL_SEARCH_DATA_DIR) # Number of filtered genes at which pre-filtering a table by gene-intervals does not improve performance # Estimated based on behavior for several representative gene lists @@ -264,11 +264,15 @@ def _load_filtered_table(self, sample_data, intervals=None, annotations=None, an *self._parse_sample_data(sample_data), parsed_intervals=parsed_intervals, raw_intervals=intervals, parsed_annotations=parsed_annotations, **kwargs) @classmethod - def _get_table_path(cls, path, use_ssd_dir=False): - return f'{SSD_DATASETS_DIR if use_ssd_dir else DATASETS_DIR}/{cls.GENOME_VERSION}/{cls.DATA_TYPE}/{path}' + def _get_table_path(cls, path): + return f'{cls._get_table_dir(path)}/{cls.GENOME_VERSION}/{cls.DATA_TYPE}/{path}' - def _read_table(self, path, drop_globals=None, use_ssd_dir=False, skip_missing_field=None): - table_path = self._get_table_path(path, use_ssd_dir=use_ssd_dir) + @classmethod + def _get_table_dir(cls, path): + return IN_MEMORY_DIR if path == 'annotations.ht' else HAIL_SEARCH_DATA_DIR + + def _read_table(self, path, drop_globals=None, skip_missing_field=None): + table_path = self._get_table_path(path) if 'variant_ht' in self._load_table_kwargs: ht = self._query_table_annotations(self._load_table_kwargs['variant_ht'], table_path) if skip_missing_field and not ht.any(hl.is_defined(ht[skip_missing_field])): @@ -303,7 +307,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ # for variant search, project_samples looks like # {: {: {: [, , ...]}, : {: []} ...}, : ...} sample_type = list(project_samples[project_guid].keys())[0] - project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) + project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht') return self._filter_entries_table(project_ht, project_samples[project_guid][sample_type], **kwargs) # Need to chunk tables or else evaluating table globals throws LineTooLong exception @@ -316,7 +320,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_ sample_data = {} for project_guid, project_sample_data in project_samples.items(): sample_type = list(project_sample_data.keys())[0] - project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True) + project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht') if project_ht is None: continue @@ -344,7 +348,7 @@ def import_filtered_table(self, project_samples, num_families, **kwargs): family_sample_data = list(project_samples.values())[0] sample_type = list(family_sample_data.keys())[0] family_guid = list(family_sample_data[sample_type].keys())[0] - family_ht = self._read_table(f'families/{sample_type}/{family_guid}.ht', use_ssd_dir=True) + family_ht = self._read_table(f'families/{sample_type}/{family_guid}.ht') family_ht = family_ht.transmute(family_entries=[family_ht.entries]) family_ht = family_ht.annotate_globals( family_guids=[family_guid], family_samples={family_guid: family_ht.sample_ids}, diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 385a2c2328..999dc4dc25 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -3,12 +3,14 @@ from aiohttp.web import HTTPNotFound import hail as hl import logging +import os from hail_search.constants import ABSENT_PATH_SORT_OFFSET, CLINVAR_KEY, CLINVAR_MITO_KEY, CLINVAR_LIKELY_PATH_FILTER, CLINVAR_PATH_FILTER, \ CLINVAR_PATH_RANGES, CLINVAR_PATH_SIGNIFICANCES, ALLOWED_TRANSCRIPTS, ALLOWED_SECONDARY_TRANSCRIPTS, PATHOGENICTY_SORT_KEY, CONSEQUENCE_SORT, \ PATHOGENICTY_HGMD_SORT_KEY, MAX_LOAD_INTERVALS from hail_search.queries.base import BaseHailTableQuery, PredictionPath, QualityFilterFormat +REFERENCE_DATASETS_DIR = os.environ.get('REFERENCE_DATASETS_DIR', '/seqr/seqr-reference-data') logger = logging.getLogger(__name__) @@ -111,6 +113,10 @@ class MitoHailTableQuery(BaseHailTableQuery): } SORTS[PATHOGENICTY_HGMD_SORT_KEY] = SORTS[PATHOGENICTY_SORT_KEY] + PREFILTER_TABLES = { + CLINVAR_KEY: 'clinvar_path_variants.ht', + } + @staticmethod def _selected_main_transcript_expr(ht): comp_het_gene_ids = getattr(ht, 'comp_het_gene_ids', None) @@ -159,22 +165,28 @@ def _get_family_passes_quality_filter(self, quality_filter, ht, pathogenicity=No return lambda entries: hl.is_defined(clinvar_path_ht[ht.key]) | passes_quality(entries) - def _get_loaded_filter_ht(self, key, table_path, get_filters, **kwargs): + def _get_loaded_filter_ht(self, key, get_filters, **kwargs): if self._filter_hts.get(key) is None: ht_filter = get_filters(**kwargs) if ht_filter is False: self._filter_hts[key] = False else: - ht = self._read_table(table_path) + ht = self._read_table(self.PREFILTER_TABLES[key]) if ht_filter is not True: ht = ht.filter(ht_filter(ht)) self._filter_hts[key] = ht return self._filter_hts[key] + @classmethod + def _get_table_dir(cls, path): + if path in cls.PREFILTER_TABLES.values(): + return REFERENCE_DATASETS_DIR + return super()._get_table_dir(path) + def _get_loaded_clinvar_prefilter_ht(self, pathogenicity): return self._get_loaded_filter_ht( - CLINVAR_KEY, 'clinvar_path_variants.ht', self._get_clinvar_prefilter, pathogenicity=pathogenicity) + CLINVAR_KEY, self._get_clinvar_prefilter, pathogenicity=pathogenicity) def _get_clinvar_prefilter(self, pathogenicity=None): clinvar_path_filters = self._get_clinvar_path_filters(pathogenicity) @@ -315,7 +327,7 @@ def _gene_rank_sort(cls, r, gene_ranks): def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs): # Get all the project-families for the looked up variant formatted as a dict of dicts: # {: {: {: True}, : {: True}}, : ...} - lookup_ht = self._read_table('lookup.ht', use_ssd_dir=True, skip_missing_field='project_stats') + lookup_ht = self._read_table('lookup.ht', skip_missing_field='project_stats') if lookup_ht is None: raise HTTPNotFound() variant_projects = lookup_ht.aggregate(hl.agg.take( diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index bebb02eab9..6da66fdd7a 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -73,13 +73,18 @@ class SnvIndelHailTableQuery37(MitoHailTableQuery): ('is_gt_10_percent', 0.1), ]) + PREFILTER_TABLES = { + **MitoHailTableQuery.PREFILTER_TABLES, + GNOMAD_GENOMES_FIELD: 'high_af_variants.ht', + } + def _prefilter_entries_table(self, ht, *args, raw_intervals=None, **kwargs): ht = super()._prefilter_entries_table(ht, *args, **kwargs) load_table_intervals = self._load_table_kwargs.get('_intervals') or [] no_interval_prefilter = not load_table_intervals or len(raw_intervals or []) > len(load_table_intervals) if 'variant_ht' not in self._load_table_kwargs and no_interval_prefilter: af_ht = self._get_loaded_filter_ht( - GNOMAD_GENOMES_FIELD, 'high_af_variants.ht', self._get_gnomad_af_prefilter, **kwargs) + GNOMAD_GENOMES_FIELD, self._get_gnomad_af_prefilter, **kwargs) if af_ht: ht = ht.filter(hl.is_missing(af_ht[ht.key])) return ht diff --git a/ui/package-lock.json b/ui/package-lock.json index 19985ffb2c..a96ea05d9a 100644 --- a/ui/package-lock.json +++ b/ui/package-lock.json @@ -13713,9 +13713,9 @@ "dev": true }, "node_modules/path-to-regexp": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-1.7.0.tgz", - "integrity": "sha1-Wf3g9DW62suhA6hOnTvGTpa5k30=", + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-1.9.0.tgz", + "integrity": "sha512-xIp7/apCFJuUHdDLWe8O1HIkb0kQrOMb/0u6FXQjemHn/ii5LrIzU6bdECnsiTF/GjZkMEKg1xdiZwNqDYlZ6g==", "dependencies": { "isarray": "0.0.1" } @@ -30179,9 +30179,9 @@ "dev": true }, "path-to-regexp": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-1.7.0.tgz", - "integrity": "sha1-Wf3g9DW62suhA6hOnTvGTpa5k30=", + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-1.9.0.tgz", + "integrity": "sha512-xIp7/apCFJuUHdDLWe8O1HIkb0kQrOMb/0u6FXQjemHn/ii5LrIzU6bdECnsiTF/GjZkMEKg1xdiZwNqDYlZ6g==", "requires": { "isarray": "0.0.1" } diff --git a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx index 0d70e51b35..c650625e85 100644 --- a/ui/pages/Project/components/FamilyTable/IndividualRow.jsx +++ b/ui/pages/Project/components/FamilyTable/IndividualRow.jsx @@ -462,6 +462,14 @@ const NON_CASE_REVIEW_FIELDS = [ { value: 'CF', text: 'UBERON:0001359 (cerebrospinal fluid)' }, { value: 'U', text: 'UBERON:0001088 (urine)' }, { value: 'NE', text: 'UBERON:0019306 (nose epithelium)' }, + { value: 'EM', text: 'UBERON:0005291 (embryonic tissue)' }, + { value: 'CE', text: 'UBERON:0002037 (cerebellum tissue)' }, + { value: 'CA', text: 'UBERON:0001133 (cardiac tissue)' }, + { value: 'IP', text: 'CL:0000034 (iPSC)' }, + { value: 'NP', text: 'CL:0011020 (iPSC NPC)' }, + { value: 'MO', text: 'CL:0000576 (monocytes - PBMCs)' }, + { value: 'LY', text: 'CL:0000542 (lymphocytes - LCLs)' }, + { value: 'FI', text: 'CL:0000057 (fibroblasts)' }, ], }, { diff --git a/ui/pages/SummaryData/components/VariantLookup.jsx b/ui/pages/SummaryData/components/VariantLookup.jsx index c67d8a7ffc..9e6ad7e00e 100644 --- a/ui/pages/SummaryData/components/VariantLookup.jsx +++ b/ui/pages/SummaryData/components/VariantLookup.jsx @@ -16,7 +16,7 @@ import Variants, { Variant, StyledVariantRow } from 'shared/components/panel/var import { FamilyVariantIndividuals } from 'shared/components/panel/variants/VariantIndividuals' import { GENOME_VERSION_FIELD } from 'shared/utils/constants' import { sendVlmContactEmail } from '../reducers' -import { geVlmDefaultContactEmailByFamily } from '../selectors' +import { getVlmDefaultContactEmails, getVlmFamiliesByContactEmail } from '../selectors' const FIELDS = [ { @@ -37,57 +37,99 @@ const FIELDS = [ { required: true, ...GENOME_VERSION_FIELD }, ] -const mapContactStateToProps = (state, ownProps) => { - const defaultEmail = geVlmDefaultContactEmailByFamily(state, ownProps)[ownProps.familyGuid] - const disabled = !defaultEmail?.to - return { - defaultEmail, - disabled, - buttonText: disabled ? 'Contact Opted Out' : null, - modalId: ownProps.familyGuid, - } -} - const mapContactDispatchToProps = { onSubmit: sendVlmContactEmail, } -const ContactButton = connect(mapContactStateToProps, mapContactDispatchToProps)(SendEmailButton) +const ContactButton = connect(null, mapContactDispatchToProps)(SendEmailButton) -const LookupFamily = ({ familyGuid, variant, reads, showReads }) => ( +const LookupFamilyLayout = ({ topContent, bottomContent, children, ...buttonProps }) => ( - - + {topContent} + + - - - {showReads} + {children} - {reads} + {bottomContent} ) -LookupFamily.propTypes = { +LookupFamilyLayout.propTypes = { + topContent: PropTypes.node, + bottomContent: PropTypes.node, + children: PropTypes.node, +} + +const InternalFamily = ({ familyGuid, variant, reads, showReads }) => ( + + + + )} + bottomContent={{reads}} + > + + {showReads} + +) + +InternalFamily.propTypes = { familyGuid: PropTypes.string.isRequired, variant: PropTypes.object.isRequired, reads: PropTypes.object, showReads: PropTypes.object, } -const LookupVariant = ({ variant }) => ( - - - {variant.lookupFamilyGuids.map(familyGuid => ( - - ))} - -) +const BaseLookupVariant = ({ variant, familiesByContactEmail, vlmDefaultContactEmails }) => { + const { internal, disabled, ...familiesByContact } = familiesByContactEmail + return ( + + + {(internal || []).map(familyGuid => ( + + ))} + {Object.entries(familiesByContact).map(([contactEmail, families]) => ( + + + {families.map(familyGuid => ( + + + + + + ))} + + + ))} + {(disabled || []).map(familyGuid => ( + + + + ))} + + ) +} -LookupVariant.propTypes = { +BaseLookupVariant.propTypes = { variant: PropTypes.object, + familiesByContactEmail: PropTypes.object, + vlmDefaultContactEmails: PropTypes.object, } +const mapStateToProps = (state, ownProps) => ({ + familiesByContactEmail: getVlmFamiliesByContactEmail(state, ownProps), + vlmDefaultContactEmails: getVlmDefaultContactEmails(state, ownProps), +}) + +const LookupVariant = connect(mapStateToProps)(BaseLookupVariant) + const VariantDisplay = ({ variants }) => ( (variants || [])[0]?.lookupFamilyGuids ? : ) diff --git a/ui/pages/SummaryData/selectors.js b/ui/pages/SummaryData/selectors.js index 04b2e09f85..a59bf34411 100644 --- a/ui/pages/SummaryData/selectors.js +++ b/ui/pages/SummaryData/selectors.js @@ -12,24 +12,28 @@ export const getMmeMetrics = state => state.mmeMetrics export const getMmeSubmissions = state => state.mmeSubmissions export const getExternalAnalysisUploadStats = state => state.externalAnalysisUploadStats -export const geVlmDefaultContactEmailByFamily = createSelector( +export const getVlmFamiliesByContactEmail = createSelector( getSortedIndividualsByFamily, + (state, ownProps) => ownProps.variant, + (individualsByFamily, variant) => (variant.lookupFamilyGuids || []).reduce((acc, familyGuid) => { + const individual = individualsByFamily[familyGuid]?.[0] + const contactEmail = individual?.projectGuid ? 'internal' : (individual?.vlmContactEmail || 'disabled') + return { ...acc, [contactEmail]: [...(acc[contactEmail] || []), familyGuid] } + }, {}), +) + +export const getVlmDefaultContactEmails = createSelector( + getVlmFamiliesByContactEmail, getGenesById, getUser, (state, ownProps) => ownProps.variant, - (individualsByFamily, genesById, user, variant) => { + (familiesByContactEmail, genesById, user, variant) => { const gene = genesById[getVariantMainGeneId(variant)]?.geneSymbol - const defaultEmail = { - subject: `${gene || variant.variantId} variant match in seqr`, - // - body: `Dear researcher,\n\nWe are interested in learning more about your case in seqr harboring ${getVariantSummary(variant)} in ${gene || 'no genes'} (${window.location.href}).\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n${user.displayName}`, - } - return (variant.lookupFamilyGuids || []).reduce((acc, familyGuid) => { - const individual = individualsByFamily[familyGuid]?.[0] - if (!individual || individual.projectGuid) { - return acc - } - return { ...acc, [familyGuid]: { ...defaultEmail, to: individual.vlmContactEmail } } - }, {}) + const subject = `${gene || variant.variantId} variant match in seqr` + const defaultEmailContent = `harboring ${getVariantSummary(variant)} in ${gene || 'no genes'} (${window.location.href}).\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n${user.displayName}` + return Object.entries(familiesByContactEmail).reduce((acc, [to, familyGuids]) => ({ + ...acc, + [to]: { to, subject, body: `Dear researcher,\n\nWe are interested in learning more about your ${familyGuids.length} cases in seqr ${defaultEmailContent}` }, + }), {}) }, )