Skip to content

Commit

Permalink
Merge pull request #4393 from broadinstitute/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
hanars authored Sep 26, 2024
2 parents 8d474f7 + 1391635 commit 1f0abb2
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 65 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/hail-search-unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ jobs:
pip install -r hail_search/requirements-test.txt
- name: Run coverage tests
run: |
export DATASETS_DIR=./hail_search/fixtures
export HAIL_SEARCH_DATA_DIR=./hail_search/fixtures
export REFERENCE_DATASETS_DIR=./hail_search/fixtures
export MAX_GENE_INTERVALS=3
export MACHINE_MEM=24
export JAVA_OPTS_XSS=16M
Expand Down
22 changes: 13 additions & 9 deletions hail_search/queries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
HAS_ALT, HAS_REF,INHERITANCE_FILTERS, PATH_FREQ_OVERRIDE_CUTOFF, MALE, RECESSIVE, REF_ALT, REF_REF, MAX_LOAD_INTERVALS, \
UNAFFECTED_ID, X_LINKED_RECESSIVE, XPOS, OMIM_SORT, FAMILY_GUID_FIELD, GENOTYPES_FIELD, AFFECTED_ID_MAP

DATASETS_DIR = os.environ.get('DATASETS_DIR', '/hail_datasets')
SSD_DATASETS_DIR = os.environ.get('SSD_DATASETS_DIR', DATASETS_DIR)
HAIL_SEARCH_DATA_DIR = os.environ.get('HAIL_SEARCH_DATA_DIR', '/seqr/seqr-hail-search-data')
IN_MEMORY_DIR = os.environ.get('IN_MEMORY_DIR', HAIL_SEARCH_DATA_DIR)

# Number of filtered genes at which pre-filtering a table by gene-intervals does not improve performance
# Estimated based on behavior for several representative gene lists
Expand Down Expand Up @@ -264,11 +264,15 @@ def _load_filtered_table(self, sample_data, intervals=None, annotations=None, an
*self._parse_sample_data(sample_data), parsed_intervals=parsed_intervals, raw_intervals=intervals, parsed_annotations=parsed_annotations, **kwargs)

@classmethod
def _get_table_path(cls, path, use_ssd_dir=False):
return f'{SSD_DATASETS_DIR if use_ssd_dir else DATASETS_DIR}/{cls.GENOME_VERSION}/{cls.DATA_TYPE}/{path}'
def _get_table_path(cls, path):
return f'{cls._get_table_dir(path)}/{cls.GENOME_VERSION}/{cls.DATA_TYPE}/{path}'

def _read_table(self, path, drop_globals=None, use_ssd_dir=False, skip_missing_field=None):
table_path = self._get_table_path(path, use_ssd_dir=use_ssd_dir)
@classmethod
def _get_table_dir(cls, path):
return IN_MEMORY_DIR if path == 'annotations.ht' else HAIL_SEARCH_DATA_DIR

def _read_table(self, path, drop_globals=None, skip_missing_field=None):
table_path = self._get_table_path(path)
if 'variant_ht' in self._load_table_kwargs:
ht = self._query_table_annotations(self._load_table_kwargs['variant_ht'], table_path)
if skip_missing_field and not ht.any(hl.is_defined(ht[skip_missing_field])):
Expand Down Expand Up @@ -303,7 +307,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_
# for variant search, project_samples looks like
# {<project_guid>: {<sample_type>: {<family_guid>: [<sample_data>, <sample_data>, ...]}, <sample_type_2>: {<family_guid_2>: []} ...}, <project_guid_2>: ...}
sample_type = list(project_samples[project_guid].keys())[0]
project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True)
project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht')
return self._filter_entries_table(project_ht, project_samples[project_guid][sample_type], **kwargs)

# Need to chunk tables or else evaluating table globals throws LineTooLong exception
Expand All @@ -316,7 +320,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, n_
sample_data = {}
for project_guid, project_sample_data in project_samples.items():
sample_type = list(project_sample_data.keys())[0]
project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht', use_ssd_dir=True)
project_ht = self._read_table(f'projects/{sample_type}/{project_guid}.ht')

if project_ht is None:
continue
Expand Down Expand Up @@ -344,7 +348,7 @@ def import_filtered_table(self, project_samples, num_families, **kwargs):
family_sample_data = list(project_samples.values())[0]
sample_type = list(family_sample_data.keys())[0]
family_guid = list(family_sample_data[sample_type].keys())[0]
family_ht = self._read_table(f'families/{sample_type}/{family_guid}.ht', use_ssd_dir=True)
family_ht = self._read_table(f'families/{sample_type}/{family_guid}.ht')
family_ht = family_ht.transmute(family_entries=[family_ht.entries])
family_ht = family_ht.annotate_globals(
family_guids=[family_guid], family_samples={family_guid: family_ht.sample_ids},
Expand Down
20 changes: 16 additions & 4 deletions hail_search/queries/mito.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
from aiohttp.web import HTTPNotFound
import hail as hl
import logging
import os

from hail_search.constants import ABSENT_PATH_SORT_OFFSET, CLINVAR_KEY, CLINVAR_MITO_KEY, CLINVAR_LIKELY_PATH_FILTER, CLINVAR_PATH_FILTER, \
CLINVAR_PATH_RANGES, CLINVAR_PATH_SIGNIFICANCES, ALLOWED_TRANSCRIPTS, ALLOWED_SECONDARY_TRANSCRIPTS, PATHOGENICTY_SORT_KEY, CONSEQUENCE_SORT, \
PATHOGENICTY_HGMD_SORT_KEY, MAX_LOAD_INTERVALS
from hail_search.queries.base import BaseHailTableQuery, PredictionPath, QualityFilterFormat

REFERENCE_DATASETS_DIR = os.environ.get('REFERENCE_DATASETS_DIR', '/seqr/seqr-reference-data')

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -111,6 +113,10 @@ class MitoHailTableQuery(BaseHailTableQuery):
}
SORTS[PATHOGENICTY_HGMD_SORT_KEY] = SORTS[PATHOGENICTY_SORT_KEY]

PREFILTER_TABLES = {
CLINVAR_KEY: 'clinvar_path_variants.ht',
}

@staticmethod
def _selected_main_transcript_expr(ht):
comp_het_gene_ids = getattr(ht, 'comp_het_gene_ids', None)
Expand Down Expand Up @@ -159,22 +165,28 @@ def _get_family_passes_quality_filter(self, quality_filter, ht, pathogenicity=No

return lambda entries: hl.is_defined(clinvar_path_ht[ht.key]) | passes_quality(entries)

def _get_loaded_filter_ht(self, key, table_path, get_filters, **kwargs):
def _get_loaded_filter_ht(self, key, get_filters, **kwargs):
if self._filter_hts.get(key) is None:
ht_filter = get_filters(**kwargs)
if ht_filter is False:
self._filter_hts[key] = False
else:
ht = self._read_table(table_path)
ht = self._read_table(self.PREFILTER_TABLES[key])
if ht_filter is not True:
ht = ht.filter(ht_filter(ht))
self._filter_hts[key] = ht

return self._filter_hts[key]

@classmethod
def _get_table_dir(cls, path):
if path in cls.PREFILTER_TABLES.values():
return REFERENCE_DATASETS_DIR
return super()._get_table_dir(path)

def _get_loaded_clinvar_prefilter_ht(self, pathogenicity):
return self._get_loaded_filter_ht(
CLINVAR_KEY, 'clinvar_path_variants.ht', self._get_clinvar_prefilter, pathogenicity=pathogenicity)
CLINVAR_KEY, self._get_clinvar_prefilter, pathogenicity=pathogenicity)

def _get_clinvar_prefilter(self, pathogenicity=None):
clinvar_path_filters = self._get_clinvar_path_filters(pathogenicity)
Expand Down Expand Up @@ -315,7 +327,7 @@ def _gene_rank_sort(cls, r, gene_ranks):
def _add_project_lookup_data(self, ht, annotation_fields, *args, **kwargs):
# Get all the project-families for the looked up variant formatted as a dict of dicts:
# {<project_guid>: {<sample_type>: {<family_guid>: True}, <sample_type_2>: {<family_guid_2>: True}}, <project_guid_2>: ...}
lookup_ht = self._read_table('lookup.ht', use_ssd_dir=True, skip_missing_field='project_stats')
lookup_ht = self._read_table('lookup.ht', skip_missing_field='project_stats')
if lookup_ht is None:
raise HTTPNotFound()
variant_projects = lookup_ht.aggregate(hl.agg.take(
Expand Down
7 changes: 6 additions & 1 deletion hail_search/queries/snv_indel_37.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,18 @@ class SnvIndelHailTableQuery37(MitoHailTableQuery):
('is_gt_10_percent', 0.1),
])

PREFILTER_TABLES = {
**MitoHailTableQuery.PREFILTER_TABLES,
GNOMAD_GENOMES_FIELD: 'high_af_variants.ht',
}

def _prefilter_entries_table(self, ht, *args, raw_intervals=None, **kwargs):
ht = super()._prefilter_entries_table(ht, *args, **kwargs)
load_table_intervals = self._load_table_kwargs.get('_intervals') or []
no_interval_prefilter = not load_table_intervals or len(raw_intervals or []) > len(load_table_intervals)
if 'variant_ht' not in self._load_table_kwargs and no_interval_prefilter:
af_ht = self._get_loaded_filter_ht(
GNOMAD_GENOMES_FIELD, 'high_af_variants.ht', self._get_gnomad_af_prefilter, **kwargs)
GNOMAD_GENOMES_FIELD, self._get_gnomad_af_prefilter, **kwargs)
if af_ht:
ht = ht.filter(hl.is_missing(af_ht[ht.key]))
return ht
Expand Down
12 changes: 6 additions & 6 deletions ui/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions ui/pages/Project/components/FamilyTable/IndividualRow.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,14 @@ const NON_CASE_REVIEW_FIELDS = [
{ value: 'CF', text: 'UBERON:0001359 (cerebrospinal fluid)' },
{ value: 'U', text: 'UBERON:0001088 (urine)' },
{ value: 'NE', text: 'UBERON:0019306 (nose epithelium)' },
{ value: 'EM', text: 'UBERON:0005291 (embryonic tissue)' },
{ value: 'CE', text: 'UBERON:0002037 (cerebellum tissue)' },
{ value: 'CA', text: 'UBERON:0001133 (cardiac tissue)' },
{ value: 'IP', text: 'CL:0000034 (iPSC)' },
{ value: 'NP', text: 'CL:0011020 (iPSC NPC)' },
{ value: 'MO', text: 'CL:0000576 (monocytes - PBMCs)' },
{ value: 'LY', text: 'CL:0000542 (lymphocytes - LCLs)' },
{ value: 'FI', text: 'CL:0000057 (fibroblasts)' },
],
},
{
Expand Down
102 changes: 72 additions & 30 deletions ui/pages/SummaryData/components/VariantLookup.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import Variants, { Variant, StyledVariantRow } from 'shared/components/panel/var
import { FamilyVariantIndividuals } from 'shared/components/panel/variants/VariantIndividuals'
import { GENOME_VERSION_FIELD } from 'shared/utils/constants'
import { sendVlmContactEmail } from '../reducers'
import { geVlmDefaultContactEmailByFamily } from '../selectors'
import { getVlmDefaultContactEmails, getVlmFamiliesByContactEmail } from '../selectors'

const FIELDS = [
{
Expand All @@ -37,57 +37,99 @@ const FIELDS = [
{ required: true, ...GENOME_VERSION_FIELD },
]

const mapContactStateToProps = (state, ownProps) => {
const defaultEmail = geVlmDefaultContactEmailByFamily(state, ownProps)[ownProps.familyGuid]
const disabled = !defaultEmail?.to
return {
defaultEmail,
disabled,
buttonText: disabled ? 'Contact Opted Out' : null,
modalId: ownProps.familyGuid,
}
}

const mapContactDispatchToProps = {
onSubmit: sendVlmContactEmail,
}

const ContactButton = connect(mapContactStateToProps, mapContactDispatchToProps)(SendEmailButton)
const ContactButton = connect(null, mapContactDispatchToProps)(SendEmailButton)

const LookupFamily = ({ familyGuid, variant, reads, showReads }) => (
const LookupFamilyLayout = ({ topContent, bottomContent, children, ...buttonProps }) => (
<StyledVariantRow>
<Grid.Column width={16}>
<FamilyVariantTags familyGuid={familyGuid} variant={variant} linkToSavedVariants />
{topContent}
<Grid.Column width={4}>
<ContactButton {...buttonProps} />
</Grid.Column>
<Grid.Column width={4}><ContactButton familyGuid={familyGuid} variant={variant} /></Grid.Column>
<Grid.Column width={12}>
<FamilyVariantIndividuals familyGuid={familyGuid} variant={variant} />
{showReads}
{children}
</Grid.Column>
<Grid.Column width={16}>{reads}</Grid.Column>
{bottomContent}
</StyledVariantRow>
)

LookupFamily.propTypes = {
LookupFamilyLayout.propTypes = {
topContent: PropTypes.node,
bottomContent: PropTypes.node,
children: PropTypes.node,
}

const InternalFamily = ({ familyGuid, variant, reads, showReads }) => (
<LookupFamilyLayout
topContent={(
<Grid.Column width={16}>
<FamilyVariantTags familyGuid={familyGuid} variant={variant} linkToSavedVariants />
</Grid.Column>
)}
bottomContent={<Grid.Column width={16}>{reads}</Grid.Column>}
>
<FamilyVariantIndividuals familyGuid={familyGuid} variant={variant} />
{showReads}
</LookupFamilyLayout>
)

InternalFamily.propTypes = {
familyGuid: PropTypes.string.isRequired,
variant: PropTypes.object.isRequired,
reads: PropTypes.object,
showReads: PropTypes.object,
}

const LookupVariant = ({ variant }) => (
<Grid stackable divided="vertically">
<Variant variant={variant} />
{variant.lookupFamilyGuids.map(familyGuid => (
<FamilyReads key={familyGuid} layout={LookupFamily} familyGuid={familyGuid} variant={variant} />
))}
</Grid>
)
const BaseLookupVariant = ({ variant, familiesByContactEmail, vlmDefaultContactEmails }) => {
const { internal, disabled, ...familiesByContact } = familiesByContactEmail
return (
<Grid stackable divided="vertically">
<Variant variant={variant} />
{(internal || []).map(familyGuid => (
<FamilyReads key={familyGuid} layout={InternalFamily} familyGuid={familyGuid} variant={variant} />
))}
{Object.entries(familiesByContact).map(([contactEmail, families]) => (
<LookupFamilyLayout
key={contactEmail}
defaultEmail={vlmDefaultContactEmails[contactEmail]}
modalId={contactEmail}
>
<Grid stackable divided="vertically">
{families.map(familyGuid => (
<Grid.Row key={familyGuid}>
<Grid.Column width={16}>
<FamilyVariantIndividuals familyGuid={familyGuid} variant={variant} />
</Grid.Column>
</Grid.Row>
))}
</Grid>
</LookupFamilyLayout>
))}
{(disabled || []).map(familyGuid => (
<LookupFamilyLayout key={familyGuid} defaultEmail={vlmDefaultContactEmails.disabled} disabled buttonText="Contact Opted Out">
<FamilyVariantIndividuals familyGuid={familyGuid} variant={variant} />
</LookupFamilyLayout>
))}
</Grid>
)
}

LookupVariant.propTypes = {
BaseLookupVariant.propTypes = {
variant: PropTypes.object,
familiesByContactEmail: PropTypes.object,
vlmDefaultContactEmails: PropTypes.object,
}

const mapStateToProps = (state, ownProps) => ({
familiesByContactEmail: getVlmFamiliesByContactEmail(state, ownProps),
vlmDefaultContactEmails: getVlmDefaultContactEmails(state, ownProps),
})

const LookupVariant = connect(mapStateToProps)(BaseLookupVariant)

const VariantDisplay = ({ variants }) => (
(variants || [])[0]?.lookupFamilyGuids ? <LookupVariant variant={variants[0]} /> : <Variants variants={variants} />
)
Expand Down
32 changes: 18 additions & 14 deletions ui/pages/SummaryData/selectors.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,28 @@ export const getMmeMetrics = state => state.mmeMetrics
export const getMmeSubmissions = state => state.mmeSubmissions
export const getExternalAnalysisUploadStats = state => state.externalAnalysisUploadStats

export const geVlmDefaultContactEmailByFamily = createSelector(
export const getVlmFamiliesByContactEmail = createSelector(
getSortedIndividualsByFamily,
(state, ownProps) => ownProps.variant,
(individualsByFamily, variant) => (variant.lookupFamilyGuids || []).reduce((acc, familyGuid) => {
const individual = individualsByFamily[familyGuid]?.[0]
const contactEmail = individual?.projectGuid ? 'internal' : (individual?.vlmContactEmail || 'disabled')
return { ...acc, [contactEmail]: [...(acc[contactEmail] || []), familyGuid] }
}, {}),
)

export const getVlmDefaultContactEmails = createSelector(
getVlmFamiliesByContactEmail,
getGenesById,
getUser,
(state, ownProps) => ownProps.variant,
(individualsByFamily, genesById, user, variant) => {
(familiesByContactEmail, genesById, user, variant) => {
const gene = genesById[getVariantMainGeneId(variant)]?.geneSymbol
const defaultEmail = {
subject: `${gene || variant.variantId} variant match in seqr`,
//
body: `Dear researcher,\n\nWe are interested in learning more about your case in seqr harboring ${getVariantSummary(variant)} in ${gene || 'no genes'} (${window.location.href}).\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n${user.displayName}`,
}
return (variant.lookupFamilyGuids || []).reduce((acc, familyGuid) => {
const individual = individualsByFamily[familyGuid]?.[0]
if (!individual || individual.projectGuid) {
return acc
}
return { ...acc, [familyGuid]: { ...defaultEmail, to: individual.vlmContactEmail } }
}, {})
const subject = `${gene || variant.variantId} variant match in seqr`
const defaultEmailContent = `harboring ${getVariantSummary(variant)} in ${gene || 'no genes'} (${window.location.href}).\n\nWe appreciate your assistance and look forward to hearing more from you.\n\nBest wishes,\n${user.displayName}`
return Object.entries(familiesByContactEmail).reduce((acc, [to, familyGuids]) => ({
...acc,
[to]: { to, subject, body: `Dear researcher,\n\nWe are interested in learning more about your ${familyGuids.length} cases in seqr ${defaultEmailContent}` },
}), {})
},
)

0 comments on commit 1f0abb2

Please sign in to comment.