Skip to content

Commit

Permalink
Merge pull request #4417 from broadinstitute/hail-search-sex-meatdata
Browse files Browse the repository at this point in the history
Use boolean for sending sex metadata to hail search
  • Loading branch information
hanars authored Oct 15, 2024
2 parents 2f3473d + 5de0094 commit 2c9553b
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 19 deletions.
1 change: 0 additions & 1 deletion hail_search/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
AFFECTED_ID = 0
UNAFFECTED_ID = 1
UNKNOWN_AFFECTED_ID = 2
MALE = 'M'
AFFECTED_ID_MAP = {AFFECTED: AFFECTED_ID, UNAFFECTED: UNAFFECTED_ID, UNKNOWN_AFFECTED: UNKNOWN_AFFECTED_ID}

GROUPED_VARIANTS_FIELD = 'variants'
Expand Down
4 changes: 2 additions & 2 deletions hail_search/queries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from hail_search.constants import AFFECTED_ID, ALT_ALT, ANNOTATION_OVERRIDE_FIELDS, ANY_AFFECTED, COMP_HET_ALT, \
COMPOUND_HET, GENOME_VERSION_GRCh38, GROUPED_VARIANTS_FIELD, ALLOWED_TRANSCRIPTS, ALLOWED_SECONDARY_TRANSCRIPTS, HAS_ANNOTATION_OVERRIDE, \
HAS_ALT, HAS_REF,INHERITANCE_FILTERS, PATH_FREQ_OVERRIDE_CUTOFF, MALE, RECESSIVE, REF_ALT, REF_REF, MAX_LOAD_INTERVALS, \
HAS_ALT, HAS_REF,INHERITANCE_FILTERS, PATH_FREQ_OVERRIDE_CUTOFF, RECESSIVE, REF_ALT, REF_REF, MAX_LOAD_INTERVALS, \
UNAFFECTED_ID, X_LINKED_RECESSIVE, XPOS, OMIM_SORT, FAMILY_GUID_FIELD, GENOTYPES_FIELD, AFFECTED_ID_MAP

HAIL_SEARCH_DATA_DIR = os.environ.get('HAIL_SEARCH_DATA_DIR', '/seqr/seqr-hail-search-data')
Expand Down Expand Up @@ -563,7 +563,7 @@ def _sample_entry_data(cls, sample):
sampleId=sample['sample_id'],
individualGuid=sample['individual_guid'],
affected_id=AFFECTED_ID_MAP.get(sample['affected']),
is_male='sex' in sample and sample['sex'] == MALE,
is_male=sample.get('is_male', False),
)

@classmethod
Expand Down
26 changes: 13 additions & 13 deletions hail_search/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,28 @@
'project_guid': 'R0001_1kg', 'affected': 'A', 'sample_type': 'WES',
}
FAMILY_2_VARIANT_SAMPLE_DATA_WITH_SEX = {'SNV_INDEL': [
{'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sample_type': 'WES', 'sex': 'F'},
{'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'sex': 'M'},
{'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'sex': 'F'},
{'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sample_type': 'WES', 'is_male': False},
{'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'is_male': True},
{'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'is_male': False},
]}
FAMILY_2_VARIANT_SAMPLE_DATA = deepcopy(FAMILY_2_VARIANT_SAMPLE_DATA_WITH_SEX)
for s in FAMILY_2_VARIANT_SAMPLE_DATA['SNV_INDEL']:
s.pop('sex')
s.pop('is_male')

EXPECTED_SAMPLE_DATA_WITH_SEX = {
'SV_WES': [
{'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sample_type': 'WES', 'sex': 'F'},
{'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'sex': 'M'},
{'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'sex': 'F'}
{'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sample_type': 'WES', 'is_male': False},
{'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'is_male': True},
{'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sample_type': 'WES', 'is_male': False}
],
}
EXPECTED_SAMPLE_DATA_WITH_SEX.update(FAMILY_2_VARIANT_SAMPLE_DATA_WITH_SEX)
EXPECTED_SAMPLE_DATA_WITH_SEX['SNV_INDEL'].append({'sex': 'M', **FAMILY_3_SAMPLE})
EXPECTED_SAMPLE_DATA_WITH_SEX['SNV_INDEL'].append({'is_male': True, **FAMILY_3_SAMPLE})

EXPECTED_SAMPLE_DATA = deepcopy(EXPECTED_SAMPLE_DATA_WITH_SEX)
for samples in EXPECTED_SAMPLE_DATA.values():
for s in samples:
s.pop('sex')
s.pop('is_male')

CUSTOM_AFFECTED_SAMPLE_DATA = {'SNV_INDEL': deepcopy(EXPECTED_SAMPLE_DATA['SNV_INDEL'])}
CUSTOM_AFFECTED_SAMPLE_DATA['SNV_INDEL'][0]['affected'] = 'N'
Expand Down Expand Up @@ -79,14 +79,14 @@
MULTI_PROJECT_MISSING_SAMPLE_DATA = deepcopy(FAMILY_2_MISSING_SAMPLE_DATA)
MULTI_PROJECT_MISSING_SAMPLE_DATA['SNV_INDEL'].append(FAMILY_11_SAMPLE_WES)

SV_WGS_SAMPLE_DATA_WITH_SEX = {'SV_WGS': [{'sex': 'M', **FAMILY_11_SAMPLE_WES, 'sample_type': 'WGS'}, {
'sample_id': 'NA20884', 'individual_guid': 'I000025_na20884', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sample_type': 'WGS', 'sex': 'M',
SV_WGS_SAMPLE_DATA_WITH_SEX = {'SV_WGS': [{'is_male': True, **FAMILY_11_SAMPLE_WES, 'sample_type': 'WGS'}, {
'sample_id': 'NA20884', 'individual_guid': 'I000025_na20884', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sample_type': 'WGS', 'is_male': True,
}, {
'sample_id': 'NA20883', 'individual_guid': 'I000035_na20883', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sample_type': 'WGS', 'sex': 'F',
'sample_id': 'NA20883', 'individual_guid': 'I000035_na20883', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sample_type': 'WGS', 'is_male': False,
}]}
SV_WGS_SAMPLE_DATA = deepcopy(SV_WGS_SAMPLE_DATA_WITH_SEX)
for s in SV_WGS_SAMPLE_DATA['SV_WGS']:
s.pop('sex')
s.pop('is_male')

SV_WES_SAMPLE_DATA = {'SV_WES': EXPECTED_SAMPLE_DATA['SV_WES'] + [FAMILY_3_SAMPLE]}

Expand Down
6 changes: 3 additions & 3 deletions seqr/utils/search/hail_search_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from collections import defaultdict

from django.db.models import F, Min, Count
from django.db.models import F, Min, Count, Case, When
from urllib3.connectionpool import connection_from_url

import requests
from reference_data.models import Omim, GeneConstraint, GENOME_VERSION_LOOKUP
from seqr.models import Sample, PhenotypePrioritization
from seqr.models import Sample, PhenotypePrioritization, Individual
from seqr.utils.search.constants import PRIORITIZED_GENE_SORT, X_LINKED_RECESSIVE
from seqr.utils.xpos_utils import MIN_POS, MAX_POS
from settings import HAIL_BACKEND_SERVICE_HOSTNAME, HAIL_BACKEND_SERVICE_PORT
Expand Down Expand Up @@ -141,7 +141,7 @@ def _get_sample_data(samples, inheritance_filter=None, inheritance_mode=None, **
affected=F('individual__affected'),
)
if inheritance_mode == X_LINKED_RECESSIVE:
sample_values['sex'] = F('individual__sex')
sample_values['is_male'] = Case(When(individual__sex=Individual.SEX_MALE, then=True), default=False)
sample_data = samples.order_by('guid').values('individual__individual_id', 'dataset_type', 'sample_type', **sample_values)

custom_affected = (inheritance_filter or {}).pop('affected', None)
Expand Down

0 comments on commit 2c9553b

Please sign in to comment.