From babadd77cbce98b3876e1ae65af330c9dc51ed12 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 18 Oct 2024 15:52:21 -0400 Subject: [PATCH 01/18] almost working --- hail_search/definitions.py | 7 +++ hail_search/queries/base.py | 11 ++-- hail_search/queries/mito.py | 118 +++++++++++++++++++++++++++++------- 3 files changed, 110 insertions(+), 26 deletions(-) diff --git a/hail_search/definitions.py b/hail_search/definitions.py index bc6aa5b204..eac11d7f0e 100644 --- a/hail_search/definitions.py +++ b/hail_search/definitions.py @@ -12,6 +12,13 @@ def family_entries_field(self) -> str: SampleType.WGS: 'wgs_family_entries', }[self] + @property + def failed_family_sample_field(self) -> str: + return { + SampleType.WES: f'wes_failed_family_sample_indices', + SampleType.WGS: f'wgs_failed_family_sample_indices', + }[self] + @property def passes_inheritance_field(self) -> str: return { diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index d30a9b8fdc..a85d5ba011 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -472,7 +472,7 @@ def _apply_entry_filters(ht): def _filter_single_entries_table(self, ht, project_families, inheritance_filter=None, quality_filter=None, is_merged_ht=False, **kwargs): ht, sorted_family_sample_data = self._add_entry_sample_families(ht, project_families, is_merged_ht) ht = self._filter_quality(ht, quality_filter, **kwargs) - ht, ch_ht = self._filter_inheritance( + ht, ch_ht, _, _ = self._filter_inheritance( ht, None, inheritance_filter, sorted_family_sample_data, ) ht = self._apply_entry_filters(ht) @@ -588,8 +588,9 @@ def _filter_inheritance( lambda entries: hl.or_missing(entries.any(any_valid_entry), entries) )}) + ch_ht_entry_indices_by_gt = None if self._has_comp_het_search: - comp_het_ht = self._annotate_families_inheritance( + comp_het_ht, ch_ht_entry_indices_by_gt = self._annotate_families_inheritance( comp_het_ht if comp_het_ht is not None else ht, COMPOUND_HET, inheritance_filter, sorted_family_sample_data, annotation, entries_ht_field ) @@ -598,12 +599,12 @@ def _filter_inheritance( # No sample-specific inheritance filtering needed sorted_family_sample_data = [] - ht = None if self._inheritance_mode == COMPOUND_HET else self._annotate_families_inheritance( + ht, ht_entry_indices_by_gt = (None, None) if self._inheritance_mode == COMPOUND_HET else self._annotate_families_inheritance( ht, self._inheritance_mode, inheritance_filter, sorted_family_sample_data, annotation, entries_ht_field ) - return ht, comp_het_ht + return ht, comp_het_ht, ht_entry_indices_by_gt, ch_ht_entry_indices_by_gt def _annotate_families_inheritance( self, ht, inheritance_mode, inheritance_filter, sorted_family_sample_data, @@ -644,7 +645,7 @@ def _annotate_families_inheritance( ) }) - return ht + return ht, entry_indices_by_gt def _get_family_passes_quality_filter(self, quality_filter, ht, **kwargs): quality_filter = quality_filter or {} diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 6b46ada427..6552af4171 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -206,37 +206,70 @@ def _filter_entries_ht_both_sample_types( ch_ht = None family_guid_idx_map = defaultdict(dict) + family_sample_idx_map = defaultdict(lambda: defaultdict(dict)) for sample_type, sorted_family_sample_data in sample_types: - ht, ch_ht = self._filter_inheritance( + ht, ch_ht, ht_idx_by_gt_map, ch_idx_by_gt_map = self._filter_inheritance( ht, ch_ht, inheritance_filter, sorted_family_sample_data, annotation=sample_type.passes_inheritance_field, entries_ht_field=sample_type.family_entries_field ) + ht = self._annotate_failed_family_samples_inheritance( + ht, ht_idx_by_gt_map, + annotation=sample_type.failed_family_sample_field, entries_ht_field=sample_type.family_entries_field + ) + ch_ht = self._annotate_failed_family_samples_inheritance( + ch_ht, ch_idx_by_gt_map, + annotation=sample_type.failed_family_sample_field, entries_ht_field=sample_type.family_entries_field + ) + for family_idx, samples in enumerate(sorted_family_sample_data): family_guid = samples[0]['familyGuid'] family_guid_idx_map[family_guid][sample_type.value] = family_idx + for sample_idx, sample in enumerate(samples): + family_sample_idx_map[family_guid][sample['sampleId']][sample_type.value] = sample_idx - family_idx_map = hl.dict(family_guid_idx_map) - ht = self._apply_multi_sample_type_entry_filters(ht, family_idx_map) - ch_ht = self._apply_multi_sample_type_entry_filters(ch_ht, family_idx_map) + family_guid_idx_map = hl.dict(family_guid_idx_map) + family_sample_idx_map = hl.dict(family_sample_idx_map) + ht = self._apply_multi_sample_type_entry_filters(ht, family_guid_idx_map, family_sample_idx_map) + ch_ht = self._apply_multi_sample_type_entry_filters(ch_ht, family_guid_idx_map, family_sample_idx_map) return ht, ch_ht - def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map): + def _annotate_failed_family_samples_inheritance( + self, ht, entry_indices_by_gt, annotation, entries_ht_field, + ): + if ht is None: + return ht + + ht = ht.annotate(**{annotation: hl.empty_dict(hl.tint32, hl.tarray(hl.tint32))}) + # print(annotation, ht[annotation].collect()) + + for genotype, entry_indices in entry_indices_by_gt.items(): + if not entry_indices: + continue + # print(genotype, entry_indices) + entry_indices = hl.dict(entry_indices) + ht = ht.annotate( + **{annotation: hl.dict( + hl.enumerate(ht[entries_ht_field]).starmap( + lambda family_index, entries: hl.bind( + lambda failed_samples: hl.tuple(( + family_index, + ht[annotation].get(family_index, hl.empty_array(hl.tint32)).extend(failed_samples) + )), + entry_indices.get(family_index).filter(lambda sample_i: ~self.GENOTYPE_QUERY_MAP[genotype](entries[sample_i].GT)) + ) + ) + )}) + # print(annotation, ht[annotation].collect()) + return ht + + def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map, sample_idx_map): if ht is None: return ht # Keep family from both sample types if either passes quality AND inheritance for sample_type in SampleType: - ht = ht.annotate(**{ - sample_type.family_entries_field: hl.enumerate(ht[sample_type.family_entries_field]).starmap( - lambda i, family_samples: hl.or_missing( - hl.bind( - lambda other_sample_type_idx: ( - self._family_has_valid_sample_type_entries(ht, sample_type, i) | - self._family_has_valid_sample_type_entries(ht, sample_type.other_sample_type, other_sample_type_idx) - ), - family_idx_map.get(hl.coalesce(family_samples)[0]['familyGuid']).get(sample_type.other_sample_type.value), - ), family_samples) - )}) + ht = self._apply_quality_entry_filters(ht, sample_type, family_idx_map) + ht = self._apply_inheritance_entry_filters(ht, sample_type, family_idx_map, sample_idx_map) # Merge family entries and filters from both sample types ht = ht.transmute( @@ -252,14 +285,57 @@ def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map): # Filter out families with no valid entries in either sample type return ht.filter(ht.family_entries.any(hl.is_defined)) + def _apply_quality_entry_filters(self, ht, sample_type, family_idx_map): + return ht.annotate(**{ + sample_type.family_entries_field: hl.enumerate(ht[sample_type.family_entries_field]).starmap( + lambda i, family_samples: hl.or_missing( + hl.bind(lambda other_sample_type_idx: ( + self._family_has_valid_quality(ht, sample_type, i) | + self._family_has_valid_quality(ht, sample_type.other_sample_type, other_sample_type_idx) + ), family_idx_map.get(hl.coalesce(family_samples)[0]['familyGuid']).get(sample_type.other_sample_type.value), + ), family_samples) + )}) + @staticmethod - def _family_has_valid_sample_type_entries(ht, sample_type, sample_type_family_idx): - # Note: This logic does not sufficiently handle case 2 here https://docs.google.com/presentation/d/1hqDV8ulhviUcR5C4PtNUqkCLXKDsc6pccgFVlFmWUAU/edit?usp=sharing - # and will need to be changed to support it - https://github.com/broadinstitute/seqr/issues/4403 + def _family_has_valid_quality(ht, sample_type, sample_type_family_idx): return ( hl.is_defined(sample_type_family_idx) & - hl.is_defined(ht[sample_type.passes_quality_field][sample_type_family_idx]) & - hl.is_defined(ht[sample_type.passes_inheritance_field][sample_type_family_idx]) + hl.is_defined(ht[sample_type.passes_quality_field][sample_type_family_idx]) + ) + + def _apply_inheritance_entry_filters(self, ht, sample_type, family_idx_map, sample_idx_map): + return ht.annotate(**{ + sample_type.family_entries_field: hl.if_else( + hl.is_missing(ht[sample_type.family_entries_field]), # If family entries has already been filtered due to quality do nothing + ht[sample_type.family_entries_field], + hl.enumerate(ht[sample_type.family_entries_field]).starmap( # Else, + lambda family_i, family_samples: hl.or_missing( + hl.all(hl.enumerate(family_samples).starmap( + lambda sample_i, sample: hl.any( # For each sample in a family, + hl.bind(lambda other_sample_type_indices: ( # Get the sample and family index of the sample in the other sample type family_entries + hl.if_else( + hl.is_defined(sample_i) & hl.is_defined(other_sample_type_indices[1]), # If samples are present for both sample types, + ( # Keep the family entries if family passes inheritance in either sample type. + hl.is_defined(ht[sample_type.passes_inheritance_field][family_i]) | + hl.is_defined(ht[sample_type.other_sample_type.passes_inheritance_field][other_sample_type_indices[0]]) + ), # Else, if sample is in only one sample type, check if that sample did not fail inheritance + self._family_sample_has_valid_inheritance(ht, sample_type, family_i, sample_i) | + self._family_sample_has_valid_inheritance(ht, sample_type.other_sample_type, other_sample_type_indices[0], other_sample_type_indices[1]) + ) + ),( + family_idx_map.get(hl.coalesce(sample)['familyGuid']).get(sample_type.other_sample_type.value), + sample_idx_map.get(hl.coalesce(sample)['familyGuid']).get(hl.coalesce(sample)['sampleId']).get(sample_type.other_sample_type.value)), + )) + )), family_samples) + )) + }) + + @staticmethod + def _family_sample_has_valid_inheritance(ht, sample_type, family_idx, sample_idx): + return ( + hl.is_defined(family_idx) & + hl.is_defined(sample_idx) & + ~hl.is_defined(ht[sample_type.failed_family_sample_field][family_idx].contains(sample_idx)) ) def _get_sample_genotype(self, samples, r=None, include_genotype_overrides=False, select_fields=None, **kwargs): From 7115f813dc1cc72b8f2809f39514ffa8ea89cf11 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 18 Oct 2024 17:18:28 -0400 Subject: [PATCH 02/18] updates --- hail_search/queries/mito.py | 77 +++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 6552af4171..b25affacea 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -250,18 +250,30 @@ def _annotate_failed_family_samples_inheritance( ht = ht.annotate( **{annotation: hl.dict( hl.enumerate(ht[entries_ht_field]).starmap( - lambda family_index, entries: hl.bind( + lambda family_idx, entries: hl.bind( lambda failed_samples: hl.tuple(( - family_index, - ht[annotation].get(family_index, hl.empty_array(hl.tint32)).extend(failed_samples) + family_idx, + ht[annotation].get(family_idx, hl.empty_array(hl.tint32)).extend(failed_samples) )), - entry_indices.get(family_index).filter(lambda sample_i: ~self.GENOTYPE_QUERY_MAP[genotype](entries[sample_i].GT)) + entry_indices.get(family_idx).filter(lambda sample_i: ~self.GENOTYPE_QUERY_MAP[genotype](entries[sample_i].GT)) ) ) )}) # print(annotation, ht[annotation].collect()) return ht + # ht = ht.annotate( + # **{annotation: ht[annotation].map_values( + # lambda existing_failed_samples: existing_failed_samples.extend( + # hl.enumerate(ht[entries_ht_field]).starmap( + # lambda family_index, entries: entry_indices.get(family_index).filter( + # lambda sample_i: ~self.GENOTYPE_QUERY_MAP[genotype](entries[sample_i].GT) + # ) + # ).flatmap(lambda x: x) + # ) + # ) + # }) + def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map, sample_idx_map): if ht is None: return ht @@ -269,6 +281,8 @@ def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map, sample_idx_ # Keep family from both sample types if either passes quality AND inheritance for sample_type in SampleType: ht = self._apply_quality_entry_filters(ht, sample_type, family_idx_map) + # TODO - Since each sample type is processed separately, wgs with 1 sample will not be filtered out if it passes in wes (even though another sample failed in wes) + # and the coalesce below keeps that sample even though the family was filtered out in wes. This is a limitation of the current implementation. ht = self._apply_inheritance_entry_filters(ht, sample_type, family_idx_map, sample_idx_map) # Merge family entries and filters from both sample types @@ -304,38 +318,35 @@ def _family_has_valid_quality(ht, sample_type, sample_type_family_idx): ) def _apply_inheritance_entry_filters(self, ht, sample_type, family_idx_map, sample_idx_map): - return ht.annotate(**{ - sample_type.family_entries_field: hl.if_else( - hl.is_missing(ht[sample_type.family_entries_field]), # If family entries has already been filtered due to quality do nothing - ht[sample_type.family_entries_field], - hl.enumerate(ht[sample_type.family_entries_field]).starmap( # Else, - lambda family_i, family_samples: hl.or_missing( - hl.all(hl.enumerate(family_samples).starmap( - lambda sample_i, sample: hl.any( # For each sample in a family, - hl.bind(lambda other_sample_type_indices: ( # Get the sample and family index of the sample in the other sample type family_entries - hl.if_else( - hl.is_defined(sample_i) & hl.is_defined(other_sample_type_indices[1]), # If samples are present for both sample types, - ( # Keep the family entries if family passes inheritance in either sample type. - hl.is_defined(ht[sample_type.passes_inheritance_field][family_i]) | - hl.is_defined(ht[sample_type.other_sample_type.passes_inheritance_field][other_sample_type_indices[0]]) - ), # Else, if sample is in only one sample type, check if that sample did not fail inheritance - self._family_sample_has_valid_inheritance(ht, sample_type, family_i, sample_i) | - self._family_sample_has_valid_inheritance(ht, sample_type.other_sample_type, other_sample_type_indices[0], other_sample_type_indices[1]) - ) - ),( - family_idx_map.get(hl.coalesce(sample)['familyGuid']).get(sample_type.other_sample_type.value), - sample_idx_map.get(hl.coalesce(sample)['familyGuid']).get(hl.coalesce(sample)['sampleId']).get(sample_type.other_sample_type.value)), - )) - )), family_samples) - )) - }) + ht = ht.annotate( + **{sample_type.family_entries_field: hl.enumerate(ht[sample_type.family_entries_field]).starmap( + lambda family_idx, family_samples: hl.or_missing( # Keep a family if + hl.all(hl.enumerate(family_samples).starmap( # For each sample in the family, + lambda sample_idx, sample: hl.bind(lambda other_sample_type_indices: ( # Get the sample and family index of the sample in the other sample type family_entries + hl.if_else( + hl.is_defined(sample_idx) & hl.is_defined(other_sample_type_indices[1]), # If samples are present for both sample types, + ( # Keep the family entries if family passes inheritance in either sample type. + hl.is_defined(ht[sample_type.passes_inheritance_field][family_idx]) | + hl.is_defined(ht[sample_type.other_sample_type.passes_inheritance_field][other_sample_type_indices[0]]) + ), # Else, if sample is in only one sample type, check if that sample did not fail inheritance in either sample type + self._family_sample_has_valid_inheritance(ht, sample_type, family_idx, sample_idx) & + self._family_sample_has_valid_inheritance(ht, sample_type.other_sample_type, other_sample_type_indices[0], other_sample_type_indices[1]) + ) + ),( + family_idx_map.get(hl.coalesce(sample)['familyGuid']).get(sample_type.other_sample_type.value), + sample_idx_map.get(hl.coalesce(sample)['familyGuid']).get(hl.coalesce(sample)['sampleId']).get(sample_type.other_sample_type.value)), + ) + )), family_samples) + ) + }) + return ht @staticmethod def _family_sample_has_valid_inheritance(ht, sample_type, family_idx, sample_idx): - return ( - hl.is_defined(family_idx) & - hl.is_defined(sample_idx) & - ~hl.is_defined(ht[sample_type.failed_family_sample_field][family_idx].contains(sample_idx)) + return hl.if_else( + hl.is_defined(family_idx) & hl.is_defined(sample_idx), + ~ht[sample_type.failed_family_sample_field][family_idx].contains(sample_idx), + True ) def _get_sample_genotype(self, samples, r=None, include_genotype_overrides=False, select_fields=None, **kwargs): From fb1dc194d0b1cfb932b20516220f194a99c6e842 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 23 Oct 2024 11:16:49 -0400 Subject: [PATCH 03/18] or_missing --- hail_search/queries/mito.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index b25affacea..66637c4f6a 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -329,7 +329,7 @@ def _apply_inheritance_entry_filters(self, ht, sample_type, family_idx_map, samp hl.is_defined(ht[sample_type.passes_inheritance_field][family_idx]) | hl.is_defined(ht[sample_type.other_sample_type.passes_inheritance_field][other_sample_type_indices[0]]) ), # Else, if sample is in only one sample type, check if that sample did not fail inheritance in either sample type - self._family_sample_has_valid_inheritance(ht, sample_type, family_idx, sample_idx) & + self._family_sample_has_valid_inheritance(ht, sample_type, family_idx, sample_idx) | self._family_sample_has_valid_inheritance(ht, sample_type.other_sample_type, other_sample_type_indices[0], other_sample_type_indices[1]) ) ),( @@ -343,10 +343,9 @@ def _apply_inheritance_entry_filters(self, ht, sample_type, family_idx_map, samp @staticmethod def _family_sample_has_valid_inheritance(ht, sample_type, family_idx, sample_idx): - return hl.if_else( + return hl.or_missing( hl.is_defined(family_idx) & hl.is_defined(sample_idx), ~ht[sample_type.failed_family_sample_field][family_idx].contains(sample_idx), - True ) def _get_sample_genotype(self, samples, r=None, include_genotype_overrides=False, select_fields=None, **kwargs): From de664cd2c02b6fa8087dc476dcd3cd8302f35e95 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 23 Oct 2024 13:48:41 -0400 Subject: [PATCH 04/18] fix sample tracking logic --- hail_search/queries/mito.py | 39 ++++++++++++------------------------- hail_search/test_search.py | 5 +++-- 2 files changed, 15 insertions(+), 29 deletions(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 66637c4f6a..439e9dfce7 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -239,40 +239,25 @@ def _annotate_failed_family_samples_inheritance( if ht is None: return ht - ht = ht.annotate(**{annotation: hl.empty_dict(hl.tint32, hl.tarray(hl.tint32))}) - # print(annotation, ht[annotation].collect()) + # Initialize empty array + ht = ht.annotate(**{annotation: ht[entries_ht_field].map(lambda x: hl.empty_array(hl.tint32))}) + # Add failed genotype samples for genotype, entry_indices in entry_indices_by_gt.items(): if not entry_indices: continue - # print(genotype, entry_indices) + entry_indices = hl.dict(entry_indices) - ht = ht.annotate( - **{annotation: hl.dict( - hl.enumerate(ht[entries_ht_field]).starmap( - lambda family_idx, entries: hl.bind( - lambda failed_samples: hl.tuple(( - family_idx, - ht[annotation].get(family_idx, hl.empty_array(hl.tint32)).extend(failed_samples) - )), - entry_indices.get(family_idx).filter(lambda sample_i: ~self.GENOTYPE_QUERY_MAP[genotype](entries[sample_i].GT)) - ) + ht = ht.annotate(**{annotation: hl.enumerate(ht[entries_ht_field]).starmap( + lambda family_idx, entries: hl.bind( + lambda failed_samples: ht[annotation][family_idx].extend(failed_samples), + entry_indices.get(family_idx).filter( + lambda sample_i: ~self.GENOTYPE_QUERY_MAP[genotype](entries[sample_i].GT) ) - )}) - # print(annotation, ht[annotation].collect()) - return ht + ) + )}) - # ht = ht.annotate( - # **{annotation: ht[annotation].map_values( - # lambda existing_failed_samples: existing_failed_samples.extend( - # hl.enumerate(ht[entries_ht_field]).starmap( - # lambda family_index, entries: entry_indices.get(family_index).filter( - # lambda sample_i: ~self.GENOTYPE_QUERY_MAP[genotype](entries[sample_i].GT) - # ) - # ).flatmap(lambda x: x) - # ) - # ) - # }) + return ht def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map, sample_idx_map): if ht is None: diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 39743b29e6..cbc6be1c88 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -388,10 +388,11 @@ async def test_both_sample_types_search(self): [VARIANT2_BOTH_SAMPLE_TYPES], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, inheritance_mode=inheritance_mode, **COMP_HET_ALL_PASS_FILTERS, intervals=[variant2_interval] ) - # Genome passes quality and inheritance exome fails inheritance (parental data shows variant is inherited). + # Genome passes quality and inheritance but exome fails inheritance (parental data shows variant is inherited). + # Variant is excluded from search results. inheritance_mode = 'de_novo' await self._assert_expected_search( - [VARIANT2_BOTH_SAMPLE_TYPES], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, + [], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, inheritance_mode=inheritance_mode, intervals=[variant2_interval] ) From 8d9d25d2402e3c01195df8f56fde6a290fc84f3e Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 23 Oct 2024 14:38:54 -0400 Subject: [PATCH 05/18] f string --- hail_search/definitions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hail_search/definitions.py b/hail_search/definitions.py index eac11d7f0e..cfa3ee9cfa 100644 --- a/hail_search/definitions.py +++ b/hail_search/definitions.py @@ -15,8 +15,8 @@ def family_entries_field(self) -> str: @property def failed_family_sample_field(self) -> str: return { - SampleType.WES: f'wes_failed_family_sample_indices', - SampleType.WGS: f'wgs_failed_family_sample_indices', + SampleType.WES: 'wes_failed_family_sample_indices', + SampleType.WGS: 'wgs_failed_family_sample_indices', }[self] @property From 877abbc8cb1d3ff5ff3428f6da2237b4f3b89f9d Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 28 Oct 2024 17:23:02 -0400 Subject: [PATCH 06/18] another pass --- hail_search/definitions.py | 11 +-- .../families/WGS/F000002_2.ht/.README.txt.crc | Bin 12 -> 12 bytes .../WGS/F000002_2.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../families/WGS/F000002_2.ht/README.txt | 4 +- .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../WGS/F000002_2.ht/metadata.json.gz | Bin 352 -> 352 bytes .../F000002_2.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../WGS/F000002_2.ht/rows/metadata.json.gz | Bin 627 -> 630 bytes ...0-57b69294-ffdb-4b39-842a-adf898b62d31.crc | Bin 0 -> 12 bytes ...0-5efaaf66-a01a-4640-a3e4-aef656269368.crc | Bin 12 -> 0 bytes ...art-0-57b69294-ffdb-4b39-842a-adf898b62d31 | Bin 0 -> 206 bytes ...art-0-5efaaf66-a01a-4640-a3e4-aef656269368 | Bin 206 -> 0 bytes hail_search/queries/base.py | 52 +++++++----- hail_search/queries/mito.py | 80 ++++++++---------- hail_search/test_utils.py | 2 +- 18 files changed, 70 insertions(+), 79 deletions(-) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-5efaaf66-a01a-4640-a3e4-aef656269368.idx => part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx}/.index.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-5efaaf66-a01a-4640-a3e4-aef656269368.idx => part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx}/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-5efaaf66-a01a-4640-a3e4-aef656269368.idx => part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx}/index (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-5efaaf66-a01a-4640-a3e4-aef656269368.idx => part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx}/metadata.json.gz (100%) create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-5efaaf66-a01a-4640-a3e4-aef656269368.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-5efaaf66-a01a-4640-a3e4-aef656269368 diff --git a/hail_search/definitions.py b/hail_search/definitions.py index cfa3ee9cfa..7cc97d1a12 100644 --- a/hail_search/definitions.py +++ b/hail_search/definitions.py @@ -15,15 +15,8 @@ def family_entries_field(self) -> str: @property def failed_family_sample_field(self) -> str: return { - SampleType.WES: 'wes_failed_family_sample_indices', - SampleType.WGS: 'wgs_failed_family_sample_indices', - }[self] - - @property - def passes_inheritance_field(self) -> str: - return { - SampleType.WES: 'wes_passes_inheritance', - SampleType.WGS: 'wgs_passes_inheritance', + SampleType.WES: 'wes_failed_family_sample_guids', + SampleType.WGS: 'wgs_failed_family_sample_guids', }[self] @property diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.README.txt.crc index f7799b94778f02019a04657be1cc91ebeb101949..b774b2827b5f9698c8ceb8bcb104c1f48138e580 100644 GIT binary patch literal 12 TcmYc;N@ieSU}8v`{4o{)695B# literal 12 TcmYc;N@ieSU}6yY%H|FL5ds2{ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.metadata.json.gz.crc index fee8fcb4687e14806f37b6d919092dba23677b46..5f2f901b97131b2aeb5048cac56077d681ae57cb 100644 GIT binary patch literal 12 TcmYc;N@ieSU}6YqUfaC{^&6Ole)K!)rG!l7c+0+Y$ZGizTcgr<*rR$q)5JmS3}|LslBn zW~k>$cD%<_9ZLV-*TC4MxpBF}Pne{tXwT3J7F{W7Dz~zZUk~EiW0VHu;mU9h+n#GG j%dN|0>O}blC1ix*?7(}pwf)V{(6str>M&i?g#rKoUI(f! delta 337 zcmV-X0j~bw0^kCW7=KQjAH8#!nPt0hy6nafQp!=uDs9>_LrDMo4hTBE+6^i1dCt@J zKt;Bhg!l)H%~VdE_Kc=ZAVCtFL+3A!`ky z8R~hG9q%z!$4dYAH83`5Zd~T@6UIUn?HO9ZvMYs6p*BIObe$bK_B^WN;Ub`M0*XCOYg6AWH}O@H3Zwh&sxAj>D%6aSIK zNw`b#oyIZ5kdg(4!h}Jn6N!H)E6n#>V52I4S)d_|d>IEcg_)AOP#yAeehUf{q4-*3 zTu`|;wn@qrPXr=&LfKYk9D5@$l#=cr%hZJt@mPUO{gVI)B{@`qX1~b~zbI@B1AlxQ zCsa9o@(_}i@*zV_wV9^W{xmefaW}qspq_&g}2UfoZux9WqF`;W2BZ1W5Y{8MKmC8l3UJnBWWswsX4dSE^XP z^ul#04Cr%rWu|{9f@`;RG~AU~ZaxX|163Quudcr4=^BlJM!Cga<-6vdM_i_A$=OR` zDx5FcyJe_47bE^VdNwnXC`v(9OsEivqQ}*Z9@%;S)lwQlT6WR`E;Vb#>V#8=%?B|c zewc#B-_bX3GgRJBf;exX+_6ji{$bhK>{BBa$h(`aXd@v^Bs)*)LLXKWXnF5cc~^H= z_2o`IC*?mvKB9SXXm31`@)^|AX;;aJF21Ew-Ew`<A8RonU(Xfuss6Xko=I0v`$wW7lvaO{7zLnE2*( z-}bWlqJjN~Bg_~Q+;Refy|RvH4ouT=opz}?=1lTRT!itCp}BKvs2wV7tea>g(`u+v QQuoTTe|WRzO(zKe0M|P#UH||9 literal 627 zcmV-(0*w71iwFP!000000NqwiZ{jcz{VzW4YLk`_upl==DMG4NT|j%-3L!U{fVZiG z9J4Bm^4~kocLJ44rC!PbC2z(v?~Pw%ABkYkKpZj?KD>ZG|9!LAf!8GlSwF#nI1eO@ zynT%CEDRxpn5;15MLu{skvPYq!t$^Ke_RGI3p9k09}}0wuuyXEsbgBsX+T~i6yIo! z3nmZ7E{eJ0kw9c7l-*5?yf*?PDcRwnNL?8L4;9GRISGJJl0zA&_nZE5vcks5btboA z#1!orH}?fPUFJ{8D$zRr3qDk2j5GASu;%JVpEY@fGV+qSDBeJDhbv2 zlyt^eKPU6J4+4;_+22iEt2=P3kU_jH5B*w-i?pARK_lr=gA4v26KKD0)6->tV&$hx zFIqtk^GOKrnc5M_+%k@EzXTN?iJ7ymO#sW%B*0Gq`qrHyF zZ0MN`0@|ltI<)B^y%+qQGqC|T+6!N=KayBs1uZPQMc_l`VeIN{rG<0~4-?1g^z30$ zT{N)&Xp9*nf?G~N`0uPUwEC7s2eesgi8<4>5*J>$_tD%bHPj9zHqlKql4vzdDXDuU N$tx--88asd000g{B9{OF diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc new file mode 100644 index 0000000000000000000000000000000000000000..7b6b76d406cc2627893358ce61707360c0866abe GIT binary patch literal 12 TcmYc;N@ieSU}AVF>}&!65*Grl literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-5efaaf66-a01a-4640-a3e4-aef656269368.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-5efaaf66-a01a-4640-a3e4-aef656269368.crc deleted file mode 100644 index d4dcde073f96bc3f5e9908e425b67b6ab38d26da..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}E^a_mM3C6{G|b diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 new file mode 100644 index 0000000000000000000000000000000000000000..57088cd43339b536d15c8ceb6a6793a3753cf5ca GIT binary patch literal 206 zcmdnVz`*bWh&8tA|5f;r%F0l}!N|apoKa+WJdlaW(V5Yafti6pgu#Y|A(=sf=Yc(! z$l+tVg_sya7~M4#pV%vWuvcmPctM+q(LGsz;og>SLbvRtc^HywukkKBk$%@+mF0uI z22b1ei`tCRVS&LxAw`LK$r-7^egT%2dX}ao`Gy7tdIko}x-1M5EDTmPQi(g43m&tV z^CxC4V^N}rFMyuz Date: Mon, 28 Oct 2024 17:26:01 -0400 Subject: [PATCH 07/18] minor things --- hail_search/queries/mito.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 52af6212d4..b14191a64d 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -255,7 +255,6 @@ def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map): if ht is None: return ht - # Keep family from both sample types if either passes quality AND inheritance for sample_type in SampleType: ht = self._apply_quality_entry_filters(ht, sample_type, family_idx_map) ht = self._apply_inheritance_entry_filters(ht, sample_type, family_idx_map) @@ -318,13 +317,6 @@ def _apply_inheritance_entry_filters(ht, sample_type, family_idx_map): ) return ht - @staticmethod - def _family_sample_has_valid_inheritance(ht, sample_type, family_idx, sample_idx): - return hl.or_missing( - hl.is_defined(family_idx) & hl.is_defined(sample_idx), - ~ht[sample_type.failed_family_sample_field][family_idx].contains(sample_idx), - ) - def _get_sample_genotype(self, samples, r=None, include_genotype_overrides=False, select_fields=None, **kwargs): if not self._has_both_sample_types: return super()._get_sample_genotype(samples, r, include_genotype_overrides, select_fields) From 773ba0cacdeca218d9e7e739e6c7603d4e8190a1 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 28 Oct 2024 20:40:34 -0400 Subject: [PATCH 08/18] less code --- hail_search/queries/mito.py | 65 ++++++++++++++----------------------- 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index b14191a64d..1b7558399a 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -205,7 +205,7 @@ def _filter_entries_ht_both_sample_types( ) ch_ht = None - family_guid_idx_map = defaultdict(dict) + family_idx_map = defaultdict(dict) for sample_type, sorted_family_sample_data in sample_types: ht, ch_ht = self._filter_inheritance( ht, ch_ht, inheritance_filter, sorted_family_sample_data, @@ -214,11 +214,11 @@ def _filter_entries_ht_both_sample_types( ) for family_idx, samples in enumerate(sorted_family_sample_data): family_guid = samples[0]['familyGuid'] - family_guid_idx_map[family_guid][sample_type.value] = family_idx + family_idx_map[family_guid][sample_type.value] = family_idx - family_guid_idx_map = hl.dict(family_guid_idx_map) - ht = self._apply_multi_sample_type_entry_filters(ht, family_guid_idx_map) - ch_ht = self._apply_multi_sample_type_entry_filters(ch_ht, family_guid_idx_map) + family_idx_map = hl.dict(family_idx_map) + ht = self._apply_multi_sample_type_entry_filters(ht, family_idx_map) + ch_ht = self._apply_multi_sample_type_entry_filters(ch_ht, family_idx_map) return ht, ch_ht def _annotate_failed_family_samples_inheritance( @@ -256,8 +256,18 @@ def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map): return ht for sample_type in SampleType: - ht = self._apply_quality_entry_filters(ht, sample_type, family_idx_map) - ht = self._apply_inheritance_entry_filters(ht, sample_type, family_idx_map) + ht = ht.annotate(**{ + sample_type.family_entries_field: hl.enumerate(ht[sample_type.family_entries_field]).starmap( + lambda family_idx, family_samples: hl.or_missing( + hl.bind(lambda other_sample_type_family_idx: (( + self._family_has_valid_quality(ht, sample_type, family_idx) | + self._family_has_valid_quality(ht, sample_type.other_sample_type, other_sample_type_family_idx) + ) & + self._family_has_valid_inheritance(ht, sample_type, family_idx, other_sample_type_family_idx) & + self._family_has_valid_inheritance(ht, sample_type.other_sample_type, other_sample_type_family_idx, family_idx) + ), family_idx_map.get(hl.coalesce(family_samples)[0]['familyGuid']).get(sample_type.other_sample_type.value), + ), family_samples) + )}) # Merge family entries and filters from both sample types ht = ht.transmute( @@ -273,17 +283,6 @@ def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map): # Filter out families with no valid entries in either sample type return ht.filter(ht.family_entries.any(hl.is_defined)) - def _apply_quality_entry_filters(self, ht, sample_type, family_idx_map): - return ht.annotate(**{ - sample_type.family_entries_field: hl.enumerate(ht[sample_type.family_entries_field]).starmap( - lambda i, family_samples: hl.or_missing( - hl.bind(lambda other_sample_type_idx: ( - self._family_has_valid_quality(ht, sample_type, i) | - self._family_has_valid_quality(ht, sample_type.other_sample_type, other_sample_type_idx) - ), family_idx_map.get(hl.coalesce(family_samples)[0]['familyGuid']).get(sample_type.other_sample_type.value), - ), family_samples) - )}) - @staticmethod def _family_has_valid_quality(ht, sample_type, sample_type_family_idx): return ( @@ -292,30 +291,14 @@ def _family_has_valid_quality(ht, sample_type, sample_type_family_idx): ) @staticmethod - def _apply_inheritance_entry_filters(ht, sample_type, family_idx_map): - ht = ht.annotate( - **{sample_type.family_entries_field: hl.enumerate(ht[sample_type.family_entries_field]).starmap( - lambda family_idx, family_samples: hl.or_missing( - hl.bind(lambda other_sample_type_family_idx: ( - hl.bind( - lambda other_sample_type_pass_samples, sample_type_pass_samples: ( - ht[sample_type.failed_family_sample_field][family_idx].all( - other_sample_type_pass_samples.contains - ) & ht[sample_type.other_sample_type.failed_family_sample_field][other_sample_type_family_idx].all( - sample_type_pass_samples.contains - )), - ht[sample_type.other_sample_type.family_entries_field][other_sample_type_family_idx].filter( - lambda s: ~ht[sample_type.other_sample_type.failed_family_sample_field][other_sample_type_family_idx].contains(s['sampleId']) - ).map(lambda s: s['sampleId']), - ht[sample_type.other_sample_type.family_entries_field][family_idx].filter( - lambda s: ~ht[sample_type.failed_family_sample_field][family_idx].contains(s['sampleId']) - ).map(lambda s: s['sampleId']), - ) - ), family_idx_map.get(hl.coalesce(family_samples)[0]['familyGuid']).get(sample_type.other_sample_type.value) - ), family_samples) - )} + def _family_has_valid_inheritance(ht, sample_type, family_idx, other_sample_type_family_idx): + return hl.bind( + lambda other_sample_type_pass_samples: ( + ht[sample_type.failed_family_sample_field][family_idx].all(other_sample_type_pass_samples.contains) + ), ht[sample_type.other_sample_type.family_entries_field][other_sample_type_family_idx].filter( + lambda s: ~ht[sample_type.other_sample_type.failed_family_sample_field][other_sample_type_family_idx].contains(s['sampleId']) + ).map(lambda s: s['sampleId']), ) - return ht def _get_sample_genotype(self, samples, r=None, include_genotype_overrides=False, select_fields=None, **kwargs): if not self._has_both_sample_types: From 18d1d6380ecf58948becbf0d8f7e839297e79701 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 31 Oct 2024 13:51:36 -0400 Subject: [PATCH 09/18] reuse as much code from _annotate_families_inheritance --- hail_search/queries/base.py | 35 +++++++++++++++++------------- hail_search/queries/mito.py | 43 ++++++++++++++++--------------------- 2 files changed, 38 insertions(+), 40 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 39836f3915..3890fd88f0 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -472,9 +472,7 @@ def _apply_entry_filters(ht): def _filter_single_entries_table(self, ht, project_families, inheritance_filter=None, quality_filter=None, is_merged_ht=False, **kwargs): ht, sorted_family_sample_data = self._add_entry_sample_families(ht, project_families, is_merged_ht) ht = self._filter_quality(ht, quality_filter, **kwargs) - ht, ch_ht = self._filter_inheritance( - ht, None, inheritance_filter, sorted_family_sample_data, self._annotate_families_inheritance - ) + ht, ch_ht = self._filter_inheritance(ht, None, inheritance_filter, sorted_family_sample_data) ht = self._apply_entry_filters(ht) ch_ht = self._apply_entry_filters(ch_ht) @@ -573,8 +571,8 @@ def _get_sample_type(cls, family_index, ht_globals): return ht_globals.sample_type def _filter_inheritance( - self, ht, comp_het_ht, inheritance_filter, sorted_family_sample_data, annotate_func, - annotation='family_entries', entries_ht_field='family_entries' + self, ht, comp_het_ht, inheritance_filter, sorted_family_sample_data, + annotation='family_entries', entries_ht_field='family_entries', **kwargs ): any_valid_entry = lambda x: self.GENOTYPE_QUERY_MAP[HAS_ALT](x.GT) @@ -589,26 +587,29 @@ def _filter_inheritance( )}) if self._has_comp_het_search: - comp_het_ht = annotate_func( + comp_het_ht = self._annotate_families_inheritance( comp_het_ht if comp_het_ht is not None else ht, COMPOUND_HET, inheritance_filter, - sorted_family_sample_data, annotation, entries_ht_field + sorted_family_sample_data, annotation, entries_ht_field, **kwargs ) if is_any_affected or not (inheritance_filter or self._inheritance_mode): # No sample-specific inheritance filtering needed sorted_family_sample_data = [] - ht = None if self._inheritance_mode == COMPOUND_HET else annotate_func( + ht = None if self._inheritance_mode == COMPOUND_HET else self._annotate_families_inheritance( ht, self._inheritance_mode, inheritance_filter, sorted_family_sample_data, - annotation, entries_ht_field + annotation, entries_ht_field, **kwargs ) return ht, comp_het_ht def _annotate_families_inheritance( self, ht, inheritance_mode, inheritance_filter, sorted_family_sample_data, - annotation, entries_ht_field, + annotation, entries_ht_field, family_passes_inheritance_filter = None ): + if not family_passes_inheritance_filter: + family_passes_inheritance_filter = self._get_family_passes_inheritance_filter + entry_indices_by_gt = self._get_entry_indices_by_gt_map( inheritance_filter, inheritance_mode, sorted_family_sample_data ) @@ -619,11 +620,9 @@ def _annotate_families_inheritance( entry_indices = hl.dict(entry_indices) ht = ht.annotate(**{ annotation: hl.enumerate(ht[entries_ht_field]).starmap( - lambda family_i, family_samples: hl.or_missing( - ~entry_indices.contains(family_i) | entry_indices[family_i].all( - lambda sample_i: self.GENOTYPE_QUERY_MAP[genotype](family_samples[sample_i].GT) - ), family_samples, - ), + lambda family_idx, family_samples: family_passes_inheritance_filter( + entry_indices, family_idx, genotype, family_samples, ht, annotation + ) ) }) @@ -653,6 +652,12 @@ def _get_entry_indices_by_gt_map(self, inheritance_filter, inheritance_mode, sor return entry_indices_by_gt + def _get_family_passes_inheritance_filter(self, entry_indices, family_idx, genotype, family_samples, *args): + return hl.or_missing( + ~entry_indices.contains(family_idx) | entry_indices[family_idx].all( + lambda sample_i: self.GENOTYPE_QUERY_MAP[genotype](family_samples[sample_i].GT) + ), family_samples) + def _get_family_passes_quality_filter(self, quality_filter, ht, **kwargs): quality_filter = quality_filter or {} diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 1b7558399a..499469b02c 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -207,10 +207,13 @@ def _filter_entries_ht_both_sample_types( ch_ht = None family_idx_map = defaultdict(dict) for sample_type, sorted_family_sample_data in sample_types: + ht = self._annotate_empty_failed_inheritance(ht, sample_type) + ch_ht = self._annotate_empty_failed_inheritance(ch_ht, sample_type) + ht, ch_ht = self._filter_inheritance( ht, ch_ht, inheritance_filter, sorted_family_sample_data, - annotate_func=self._annotate_failed_family_samples_inheritance, annotation=sample_type.failed_family_sample_field, entries_ht_field=sample_type.family_entries_field, + family_passes_inheritance_filter=self._get_family_passes_inheritance_filter_both_sample_types ) for family_idx, samples in enumerate(sorted_family_sample_data): family_guid = samples[0]['familyGuid'] @@ -221,35 +224,25 @@ def _filter_entries_ht_both_sample_types( ch_ht = self._apply_multi_sample_type_entry_filters(ch_ht, family_idx_map) return ht, ch_ht - def _annotate_failed_family_samples_inheritance( - self, ht, inheritance_mode, inheritance_filter, sorted_family_sample_data, annotation, entries_ht_field - ): - entry_indices_by_gt = self._get_entry_indices_by_gt_map( - inheritance_filter, inheritance_mode, sorted_family_sample_data - ) - + @staticmethod + def _annotate_empty_failed_inheritance(ht, sample_type): if ht is None: return ht - # Initialize empty array - ht = ht.annotate(**{annotation: ht[entries_ht_field].map(lambda x: hl.empty_array(hl.tstr))}) - - # Add failed genotype samples - for genotype, entry_indices in entry_indices_by_gt.items(): - if not entry_indices: - continue - - entry_indices = hl.dict(entry_indices) - ht = ht.annotate(**{annotation: hl.enumerate(ht[entries_ht_field]).starmap( - lambda family_idx, entries: hl.bind( - lambda failed_samples: ht[annotation][family_idx].extend(failed_samples), - entry_indices.get(family_idx).filter( - lambda sample_idx: ~self.GENOTYPE_QUERY_MAP[genotype](entries[sample_idx].GT) - ).map(lambda sample_idx: entries[sample_idx]['sampleId']) - ) + return ht.annotate(**{ + sample_type.failed_family_sample_field: ht[sample_type.family_entries_field].map( + lambda x: hl.empty_array(hl.tstr) )}) - return ht + def _get_family_passes_inheritance_filter_both_sample_types( + self, entry_indices, family_idx, genotype, family_samples, ht, annotation + ): + return hl.bind( + lambda failed_samples: ht[annotation][family_idx].extend(failed_samples), + entry_indices.get(family_idx).filter( + lambda sample_idx: ~self.GENOTYPE_QUERY_MAP[genotype](family_samples[sample_idx].GT) + ).map(lambda sample_idx: family_samples[sample_idx]['sampleId']) + ) def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map): if ht is None: From 4d58e03571250229e9980b6b83a5b732c7aae71c Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 4 Nov 2024 11:57:12 -0500 Subject: [PATCH 10/18] rstructure passes inheritance field --- hail_search/definitions.py | 6 ++--- hail_search/queries/base.py | 12 +++++----- hail_search/queries/mito.py | 45 ++++++++++++++++++++----------------- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/hail_search/definitions.py b/hail_search/definitions.py index 7cc97d1a12..bf8baaec2c 100644 --- a/hail_search/definitions.py +++ b/hail_search/definitions.py @@ -13,10 +13,10 @@ def family_entries_field(self) -> str: }[self] @property - def failed_family_sample_field(self) -> str: + def passes_inheritance_field(self) -> str: return { - SampleType.WES: 'wes_failed_family_sample_guids', - SampleType.WGS: 'wgs_failed_family_sample_guids', + SampleType.WES: 'wes_passes_inheritance', + SampleType.WGS: 'wgs_passes_quality', }[self] @property diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 3890fd88f0..0da165bcb4 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -628,6 +628,12 @@ def _annotate_families_inheritance( return ht + def _get_family_passes_inheritance_filter(self, entry_indices, family_idx, genotype, family_samples, *args): + return hl.or_missing( + ~entry_indices.contains(family_idx) | entry_indices[family_idx].all( + lambda sample_i: self.GENOTYPE_QUERY_MAP[genotype](family_samples[sample_i].GT) + ), family_samples) + def _get_entry_indices_by_gt_map(self, inheritance_filter, inheritance_mode, sorted_family_sample_data): individual_genotype_filter = (inheritance_filter or {}).get('genotype') @@ -652,12 +658,6 @@ def _get_entry_indices_by_gt_map(self, inheritance_filter, inheritance_mode, sor return entry_indices_by_gt - def _get_family_passes_inheritance_filter(self, entry_indices, family_idx, genotype, family_samples, *args): - return hl.or_missing( - ~entry_indices.contains(family_idx) | entry_indices[family_idx].all( - lambda sample_i: self.GENOTYPE_QUERY_MAP[genotype](family_samples[sample_i].GT) - ), family_samples) - def _get_family_passes_quality_filter(self, quality_filter, ht, **kwargs): quality_filter = quality_filter or {} diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 499469b02c..419ac5ee6f 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -207,12 +207,11 @@ def _filter_entries_ht_both_sample_types( ch_ht = None family_idx_map = defaultdict(dict) for sample_type, sorted_family_sample_data in sample_types: - ht = self._annotate_empty_failed_inheritance(ht, sample_type) - ch_ht = self._annotate_empty_failed_inheritance(ch_ht, sample_type) - + ht = self._annotate_initial_passes_inheritance(ht, sample_type) + ch_ht = self._annotate_initial_passes_inheritance(ch_ht, sample_type) ht, ch_ht = self._filter_inheritance( ht, ch_ht, inheritance_filter, sorted_family_sample_data, - annotation=sample_type.failed_family_sample_field, entries_ht_field=sample_type.family_entries_field, + annotation=sample_type.passes_inheritance_field, entries_ht_field=sample_type.family_entries_field, family_passes_inheritance_filter=self._get_family_passes_inheritance_filter_both_sample_types ) for family_idx, samples in enumerate(sorted_family_sample_data): @@ -225,23 +224,25 @@ def _filter_entries_ht_both_sample_types( return ht, ch_ht @staticmethod - def _annotate_empty_failed_inheritance(ht, sample_type): + def _annotate_initial_passes_inheritance(ht, sample_type): if ht is None: return ht return ht.annotate(**{ - sample_type.failed_family_sample_field: ht[sample_type.family_entries_field].map( - lambda x: hl.empty_array(hl.tstr) + sample_type.passes_inheritance_field: ht[sample_type.family_entries_field].map( + lambda family_entries: hl.array( + hl.range(0, hl.len(family_entries)).map(lambda _: True) + ) )}) def _get_family_passes_inheritance_filter_both_sample_types( self, entry_indices, family_idx, genotype, family_samples, ht, annotation ): - return hl.bind( - lambda failed_samples: ht[annotation][family_idx].extend(failed_samples), - entry_indices.get(family_idx).filter( - lambda sample_idx: ~self.GENOTYPE_QUERY_MAP[genotype](family_samples[sample_idx].GT) - ).map(lambda sample_idx: family_samples[sample_idx]['sampleId']) + return hl.enumerate(ht[annotation][family_idx]).starmap( + lambda sample_idx, passes: (hl.case() + .when(~entry_indices.get(family_idx).contains(sample_idx), passes) + .when(~self.GENOTYPE_QUERY_MAP[genotype](family_samples[sample_idx].GT), False) + .default(passes)) ) def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map): @@ -283,16 +284,20 @@ def _family_has_valid_quality(ht, sample_type, sample_type_family_idx): hl.is_defined(ht[sample_type.passes_quality_field][sample_type_family_idx]) ) - @staticmethod - def _family_has_valid_inheritance(ht, sample_type, family_idx, other_sample_type_family_idx): - return hl.bind( - lambda other_sample_type_pass_samples: ( - ht[sample_type.failed_family_sample_field][family_idx].all(other_sample_type_pass_samples.contains) - ), ht[sample_type.other_sample_type.family_entries_field][other_sample_type_family_idx].filter( - lambda s: ~ht[sample_type.other_sample_type.failed_family_sample_field][other_sample_type_family_idx].contains(s['sampleId']) - ).map(lambda s: s['sampleId']), + def _family_has_valid_inheritance(self, ht, sample_type, family_idx, other_sample_type_family_idx): + return self._get_passes_inheritance_samples(ht, sample_type, family_idx).all( + self._get_passes_inheritance_samples(ht, sample_type.other_sample_type, other_sample_type_family_idx).contains ) + @staticmethod + def _get_passes_inheritance_samples(ht, sample_type, family_idx): + return hl.enumerate(ht[sample_type.family_entries_field][family_idx]).starmap( + lambda sample_idx, sample: hl.or_missing( + ht[sample_type.passes_inheritance_field][family_idx][sample_idx], + sample['sampleId'], + ) + ).filter(hl.is_defined) + def _get_sample_genotype(self, samples, r=None, include_genotype_overrides=False, select_fields=None, **kwargs): if not self._has_both_sample_types: return super()._get_sample_genotype(samples, r, include_genotype_overrides, select_fields) From e1edb07d4c5fba2095ba23e5bd62d7c96888e790 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 4 Nov 2024 12:34:31 -0500 Subject: [PATCH 11/18] fix _family_has_valid_inheritance --- hail_search/queries/mito.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index 419ac5ee6f..73263e8098 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -284,19 +284,24 @@ def _family_has_valid_quality(ht, sample_type, sample_type_family_idx): hl.is_defined(ht[sample_type.passes_quality_field][sample_type_family_idx]) ) - def _family_has_valid_inheritance(self, ht, sample_type, family_idx, other_sample_type_family_idx): - return self._get_passes_inheritance_samples(ht, sample_type, family_idx).all( - self._get_passes_inheritance_samples(ht, sample_type.other_sample_type, other_sample_type_family_idx).contains - ) - @staticmethod - def _get_passes_inheritance_samples(ht, sample_type, family_idx): - return hl.enumerate(ht[sample_type.family_entries_field][family_idx]).starmap( - lambda sample_idx, sample: hl.or_missing( - ht[sample_type.passes_inheritance_field][family_idx][sample_idx], - sample['sampleId'], - ) - ).filter(hl.is_defined) + def _family_has_valid_inheritance(ht, sample_type, family_idx, other_sample_type_family_idx): + return hl.bind( + lambda sample_type_fail_samples, other_sample_type_pass_samples: ( + sample_type_fail_samples.all(other_sample_type_pass_samples.contains) + ), hl.enumerate(ht[sample_type.family_entries_field][family_idx]).starmap( + lambda sample_idx, sample: hl.or_missing( + ~ht[sample_type.passes_inheritance_field][family_idx][sample_idx], + sample['sampleId'], + ) + ).filter(hl.is_defined), + hl.enumerate(ht[sample_type.other_sample_type.family_entries_field][other_sample_type_family_idx]).starmap( + lambda sample_idx, sample: hl.or_missing( + ht[sample_type.other_sample_type.passes_inheritance_field][other_sample_type_family_idx][sample_idx], + sample['sampleId'], + ) + ).filter(hl.is_defined), + ) def _get_sample_genotype(self, samples, r=None, include_genotype_overrides=False, select_fields=None, **kwargs): if not self._has_both_sample_types: From d8063324e0b8c36b46158b030f8e14c73d428361 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 4 Nov 2024 14:32:57 -0500 Subject: [PATCH 12/18] fix up test cases --- .../families/WGS/F000002_2.ht/.README.txt.crc | Bin 12 -> 12 bytes .../WGS/F000002_2.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../families/WGS/F000002_2.ht/README.txt | 4 +- .../.index.crc | Bin .../.metadata.json.gz.crc | Bin .../index | Bin .../metadata.json.gz | Bin .../WGS/F000002_2.ht/metadata.json.gz | Bin 352 -> 352 bytes .../F000002_2.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../WGS/F000002_2.ht/rows/metadata.json.gz | Bin 630 -> 628 bytes ...0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc | Bin 0 -> 12 bytes ...art-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac | Bin 0 -> 207 bytes .../WGS/F000002_2_old.ht/.README.txt.crc | Bin 0 -> 12 bytes .../WGS/F000002_2_old.ht/._SUCCESS.crc | Bin 0 -> 8 bytes .../F000002_2_old.ht/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../families/WGS/F000002_2_old.ht/README.txt | 3 + .../families/WGS/F000002_2_old.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../F000002_2_old.ht/globals/metadata.json.gz | Bin 0 -> 289 bytes .../globals/parts/.part-0.crc | Bin 0 -> 12 bytes .../WGS/F000002_2_old.ht/globals/parts/part-0 | Bin 0 -> 123 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 111 bytes .../metadata.json.gz | Bin 0 -> 184 bytes .../WGS/F000002_2_old.ht/metadata.json.gz | Bin 0 -> 352 bytes .../rows/.metadata.json.gz.crc | Bin 0 -> 16 bytes .../F000002_2_old.ht/rows/metadata.json.gz | Bin 0 -> 630 bytes ...0-57b69294-ffdb-4b39-842a-adf898b62d31.crc | Bin ...art-0-57b69294-ffdb-4b39-842a-adf898b62d31 | Bin hail_search/test_search.py | 39 ++++---- hail_search/test_utils.py | 91 +++++++++++++++--- 32 files changed, 103 insertions(+), 34 deletions(-) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx => part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx}/.index.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx => part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx}/.metadata.json.gz.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx => part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx}/index (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/index/{part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx => part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.idx}/metadata.json.gz (100%) create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/.README.txt.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/._SUCCESS.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/README.txt create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/_SUCCESS create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/globals/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/globals/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/globals/parts/.part-0.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/globals/parts/part-0 create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/index create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/metadata.json.gz rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/{F000002_2.ht => F000002_2_old.ht}/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc (100%) rename hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/{F000002_2.ht => F000002_2_old.ht}/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 (100%) diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.README.txt.crc index b774b2827b5f9698c8ceb8bcb104c1f48138e580..3b6f654d6c82ab84756a952e252b70193d3aaff9 100644 GIT binary patch literal 12 TcmYc;N@ieSU}AVIzG@->6E*{I literal 12 TcmYc;N@ieSU}8v`{4o{)695B# diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/.metadata.json.gz.crc index 5f2f901b97131b2aeb5048cac56077d681ae57cb..fee8fcb4687e14806f37b6d919092dba23677b46 100644 GIT binary patch literal 12 TcmYc;N@ieSU}E^xX(_LrDMo4hTBE+6^i1dCt@J zKt;Bhg!l)H%~VdE_Kc=ZAVCtFL+3A!`ky z8R~hG9q%z!$4dYAH83`5Zd~T@6UIUn?HO9ZvMYs6faC{^&6Ole)K!)rG!l7c+0+Y$ZGizTcgr<*rR$q)5JmS3}|LslBn zW~k>$cD%<_9ZLV-*TC4MxpBF}Pne{tXwT3J7F{W7Dz~zZUk~EiW0VHu;mU9h+n#GG j%dN|0>O}blC1ix*?7(}pwf)V{(6str>M&i?g#rKoUI(f! diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/.metadata.json.gz.crc index 06d9e98f8ef007ecd9a867ce19d2f47714973285..6a1017e8ebf299656da8b2b4dee4cd373e563000 100644 GIT binary patch literal 16 XcmYc;N@ieSU}8A2OZP$7&J9}uDVPRw literal 16 XcmYc;N@ieSU}6Xtxo{z3&dN;yBiIF% diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/metadata.json.gz index 75aaf792a6baad3fc0bef4dc8622e662d3fffcc8..eeeb78776570901cc864e22a18bd0d1ed5646f88 100644 GIT binary patch literal 628 zcmV-)0*n10iwFP!000000NqwiZ{jcz{VzW4YLhMr5J7H0DMG4N-GcV86+*5(0dG?W zIc8NB<-d2F?*vvVm3k=$l)M?wyf=Q4eI$}T1M$gRa(DrL|Lc0O1LhKote@aO{09<7 zY#-x04MPYaCMyiFhyyc;#6K1lmWLhiNg2Q_Fc3z*O~*8bg;sl}k7+r-0kKGGu`!sC zR2{5c6bmgPiO5{)uA5kSZv;k4(ZfTLy0QWhYEZF%5&)@{fHF|;H~Z;lg{{%ppWcKK z)n3oKzAMn#GJi@|iP7;l*U?aLYyQ8??kC&s;A=@RYlewJZYpvM&?Q#zDl>CNC7~Lh zlFkI{=VTuDK>(^X``hW*aR=inWD&L%A+M#3k@gcZXe2!va3TI-g4geNz0+mCW6jg0 z7p_BLK%d(y6Z1n6!nm!aVP0UV`4ohARBsW#y851`8#DwOq!xRV@9KLV2o>wK;4hh} zaK32ohM}rljQAhu*~E&1AO>ABp;AVg=BsNxlJownq|}5o>|_L7YSxm~38xmDPhvp) zFeMFtpl{x1sI;E~dEP>)W7qio!@9NEr$)?>4>w)bMgfZyKTqmHAC?nnc<)qsTX$Ra zv#x8C270R!N8{zN1Rt2y@Wm*>7I#j?KLR=PvEq9qJ71p38^! zkbADpdja!$p67HZOYa4L?y20sIND2Y)*nf%v4Rnn-6HTI^DuU0Tjd~~!o$ROI=${N zsV*AWe>B025y34dAo&~X44uB?cmuClYKb|sw33i4+;cQ{N)5F`iA_xtgCs@`)moWe ON%9JAp*YYd2><}TI47I{ literal 630 zcmV-+0*U<}iwFP!000000NqwiZ`wc*{V#iJRg=axxVCSY5ELn@Qb0X4LTK@hVH;p*BIObe$bK_B^WN;Ub`M0*XCOYg6AWH}O@H3Zwh&sxAj>D%6aSIK zNw`b#oyIZ5kdg(4!h}Jn6N!H)E6n#>V52I4S)d_|d>IEcg_)AOP#yAeehUf{q4-*3 zTu`|;wn@qrPXr=&LfKYk9D5@$l#=cr%hZJt@mPUO{gVI)B{@`qX1~b~zbI@B1AlxQ zCsa9o@(_}i@*zV_wV9^W{xmefaW}qspq_&g}2UfoZux9WqF`;W2BZ1W5Y{8MKmC8l3UJnBWWswsX4dSE^XP z^ul#04Cr%rWu|{9f@`;RG~AU~ZaxX|163Quudcr4=^BlJM!Cga<-6vdM_i_A$=OR` zDx5FcyJe_47bE^VdNwnXC`v(9OsEivqQ}*Z9@%;S)lwQlT6WR`E;Vb#>V#8=%?B|c zewc#B-_bX3GgRJBf;exX+_6ji{$bhK>{BBa$h(`aXd@v^Bs)*)LLXKWXnF5cc~^H= z_2o`IC*?mvKB9SXXm31`@)^|AX;;aJF21Ew-Ew`<A8RonU(Xfuss6Xko=I0v`$wW7lvaO{7zLnE2*( z-}bWlqJjN~Bg_~Q+;Refy|RvH4ouT=opz}?=1lTRT!itCp}BKvs2wV7tea>g(`u+v QQuoTTe|WRzO(zKe0M|P#UH||9 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac.crc new file mode 100644 index 0000000000000000000000000000000000000000..4155311e5a4dac9c8a79c47c1560d5917061165e GIT binary patch literal 12 TcmYc;N@ieSU}88fdg&Yh6HNoR literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-d68dd2a3-0a18-42d9-9d62-d4fc646610ac new file mode 100644 index 0000000000000000000000000000000000000000..4317a07a18b36279fdb7e877ea6f0a02e24cf647 GIT binary patch literal 207 zcmdnRz`*bWh&8tA|5f;r%gWHg!N|apoKa+WJdlaW(V5Yafti6pgu#Y|A(=sf=Yc(! z$l+tVg_sya7~M4#pV%v~e6Uw({CGi|iP1e-f8pMiZ$h{1rFj^VYp?MxJCT0ZUKOZ7 zgQso#MQujuu)yG;kfOxA%2MHm|&VVcJw%Og9 z)TaO4843l#i#=`Mk7qxo1*-~z7+xHbBb>cGW)9N)kx1-}(nNX6f~$3Cy(+4lmIi%1O7ERh&76H>m1`DJU7-r}m$R=0CdLP~lTE(% zn_S|11nb6bj_8cSpq)Z{ZHHXd>ovKv>piE*&G;@kS`p|EljBJw$xFRu$gj{!#LRqi zU^iCVvVi(e-+ZsGBIpnVPt0aa5pe8H#RmhWOPk0w$j%xPAx3bO-U?CEKV&c z)-OpdE{RXfONmcO&CQPwGB7hWiZ944NX^O2OO4knOHR{EO0QxLcMoP@VeoAAn%ml- TGfh;57ibFzfNf%6WMBXQjJYDO literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..fb65f202b7365f1ec2c4dee8517cc4c063161e7f GIT binary patch literal 12 TcmYc;N@ieSU}Bi4v_ljC5w8Nf literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..4eb1dd4f7a5598fce8f9d708d58fd7ca59672cc9 GIT binary patch literal 12 TcmYc;N@ieSU}DHvTU`nO5|RTz literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..51f5b8d2767b5254f9395fb4341922a95fa01caf GIT binary patch literal 111 zcmeZZU|_HZVvVi(e--TSGBU6*Gcd5QBxe*E9uH(l1!ORkx$iTqJ2s8`DC;+X888!d_ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..caa522e8089ac6d2d2dba6e427e15c1bc315afa2 GIT binary patch literal 184 zcmV;p07w5HiwFP!0000009B5`3c@fDME_+^3ON)lwdN**9uyQ6FXAD_Z4(TMBwH|) z{<~XmUY3QKc{4pU*5MUnvcZF_s-mm~Jfia2Sl@1xrVv4v#j+A$(6)v^&IG(LU-BD4 z!kJ>HsdqtlN$$rx*`C&BUQdN`4jm@Bic#eo8<}_?K!EZ-kRq=9hm4xvVNTpA!1^bC mj*1K$em|=*1UfWLLY}eUNs5^I=m?z1j^a?|IJC_CQ6pxq##cjLnrKY&Z&oF(rGRi%UozdqJ-s1`os082a0% zU^AIa2qgx#U_ho|69XdA-0cy*q=-1aLA@(AVCA!ZsdhqbxL_-1iqut>A2bqqX4%vj zUam3EY$ZGizTcg zr<*rR$q)5JmS3}|LslBnW~k>$cD%<_9sl3gz}Td@ak;}!n53#`&(I1MT`6iRx3Z33 y58~Qmlm_JC%5Vp*BIObe$bK_B^WN;Ub`M0*XCOYg6AWH}O@H3Zwh&sxAj>D%6aSIK zNw`b#oyIZ5kdg(4!h}Jn6N!H)E6n#>V52I4S)d_|d>IEcg_)AOP#yAeehUf{q4-*3 zTu`|;wn@qrPXr=&LfKYk9D5@$l#=cr%hZJt@mPUO{gVI)B{@`qX1~b~zbI@B1AlxQ zCsa9o@(_}i@*zV_wV9^W{xmefaW}qspq_&g}2UfoZux9WqF`;W2BZ1W5Y{8MKmC8l3UJnBWWswsX4dSE^XP z^ul#04Cr%rWu|{9f@`;RG~AU~ZaxX|163Quudcr4=^BlJM!Cga<-6vdM_i_A$=OR` zDx5FcyJe_47bE^VdNwnXC`v(9OsEivqQ}*Z9@%;S)lwQlT6WR`E;Vb#>V#8=%?B|c zewc#B-_bX3GgRJBf;exX+_6ji{$bhK>{BBa$h(`aXd@v^Bs)*)LLXKWXnF5cc~^H= z_2o`IC*?mvKB9SXXm31`@)^|AX;;aJF21Ew-Ew`<A8RonU(Xfuss6Xko=I0v`$wW7lvaO{7zLnE2*( z-}bWlqJjN~Bg_~Q+;Refy|RvH4ouT=opz}?=1lTRT!itCp}BKvs2wV7tea>g(`u+v QQuoTTe|WRzO(zKe0M|P#UH||9 literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 similarity index 100% rename from hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2.ht/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 rename to hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 diff --git a/hail_search/test_search.py b/hail_search/test_search.py index cbc6be1c88..3593caabd5 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -13,7 +13,10 @@ FAMILY_2_MITO_SAMPLE_DATA, FAMILY_2_ALL_SAMPLE_DATA, MITO_VARIANT1, MITO_VARIANT2, MITO_VARIANT3, \ EXPECTED_SAMPLE_DATA_WITH_SEX, SV_WGS_SAMPLE_DATA_WITH_SEX, VARIANT_LOOKUP_VARIANT, \ MULTI_PROJECT_SAMPLE_TYPES_SAMPLE_DATA, FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, \ - VARIANT1_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES, FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS + VARIANT1_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES, FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, \ + VARIANT3_BOTH_SAMPLE_TYPES, VARIANT4_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, \ + VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, \ + VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY from hail_search.web_app import init_web_app, sync_to_async_hail_query from hail_search.queries.base import BaseHailTableQuery @@ -365,35 +368,35 @@ async def test_both_sample_types_search(self): MULTI_PROJECT_BOTH_SAMPLE_TYPE_VARIANTS, gene_counts=GENE_COUNTS, sample_data=MULTI_PROJECT_SAMPLE_TYPES_SAMPLE_DATA, ) - # Variant1 in family_2 is de novo in exome but maternally inherited in genome. - # Genome passes quality and inheritance, show genotypes for both sample types. - variant1_interval = ['1', 10438, 10440] + # Variant 1 is de novo in exome but maternally inherited in genome. + # Variant 2 is inherited in exome and de novo in genome. + # Variant 3 is inherited in both sample types. Variant 4 is de novo in both sample types. inheritance_mode = 'recessive' await self._assert_expected_search( - [VARIANT1_BOTH_SAMPLE_TYPES], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, - **COMP_HET_ALL_PASS_FILTERS, intervals=[variant1_interval] + [VARIANT1_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES, [VARIANT3_BOTH_SAMPLE_TYPES, VARIANT4_BOTH_SAMPLE_TYPES]], + sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, + **COMP_HET_ALL_PASS_FILTERS ) - # Exome passes quality and inheritance, show genotypes for both sample types. inheritance_mode = 'de_novo' await self._assert_expected_search( - [VARIANT1_BOTH_SAMPLE_TYPES], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, - intervals=[variant1_interval] + [VARIANT1_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES, VARIANT4_BOTH_SAMPLE_TYPES], + sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, + **COMP_HET_ALL_PASS_FILTERS ) - # Variant 2 in family_2 is inherited in exome and there is no parental data in genome. - # Genome and exome pass quality and inheritance, show genotypes for both sample types. - variant2_interval = ['1', 38724418, 38724420] + # Same variants, but genome data is proband-only. inheritance_mode = 'recessive' await self._assert_expected_search( - [VARIANT2_BOTH_SAMPLE_TYPES], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, - inheritance_mode=inheritance_mode, **COMP_HET_ALL_PASS_FILTERS, intervals=[variant2_interval] + [VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT2_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, + [VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY]], + sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, inheritance_mode=inheritance_mode, + **COMP_HET_ALL_PASS_FILTERS ) - # Genome passes quality and inheritance but exome fails inheritance (parental data shows variant is inherited). - # Variant is excluded from search results. inheritance_mode = 'de_novo' await self._assert_expected_search( - [], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, - inheritance_mode=inheritance_mode, intervals=[variant2_interval] + [VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY], + sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, inheritance_mode=inheritance_mode, + **COMP_HET_ALL_PASS_FILTERS ) async def test_inheritance_filter(self): diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index 7116ca8308..722e7242a7 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -160,23 +160,29 @@ '_sort': [1000010439], 'CAID': 'CA16717152', } -VARIANT1_BOTH_SAMPLE_TYPES = deepcopy(VARIANT1) -genotypes = VARIANT1['genotypes'] -VARIANT1_BOTH_SAMPLE_TYPES['genotypes'] = { + +VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY = deepcopy(VARIANT1) +genotypes = VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] +VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] = { 'I000004_hg00731': [ genotypes['I000004_hg00731'], {**genotypes['I000004_hg00731'], 'numAlt': 2, 'sampleType': 'WGS'} ], - 'I000005_hg00732': [ - genotypes['I000005_hg00732'], - {**genotypes['I000005_hg00732'], 'gq': 99, 'numAlt': 1, 'sampleType': 'WGS'} - ], - 'I000006_hg00733': [ - genotypes['I000006_hg00733'], - {**genotypes['I000006_hg00733'], 'sampleType': 'WGS'} - ], + 'I000005_hg00732': [genotypes['I000005_hg00732']], + 'I000006_hg00733': [genotypes['I000006_hg00733']], } +VARIANT1_BOTH_SAMPLE_TYPES = deepcopy(VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY) +genotypes = VARIANT1_BOTH_SAMPLE_TYPES['genotypes'] +VARIANT1_BOTH_SAMPLE_TYPES['genotypes']['I000005_hg00732'] = [ + *genotypes['I000005_hg00732'], + {**genotypes['I000005_hg00732'][0], 'gq': 99, 'numAlt': 1, 'sampleType': 'WGS'} +] +VARIANT1_BOTH_SAMPLE_TYPES['genotypes']['I000006_hg00733'] = [ + *genotypes['I000006_hg00733'], + {**genotypes['I000006_hg00733'][0], 'sampleType': 'WGS'} +] + VARIANT2 = { 'variantId': '1-38724419-T-G', 'chrom': '1', @@ -280,9 +286,10 @@ '_sort': [1038724419], 'CAID': None, } -VARIANT2_BOTH_SAMPLE_TYPES = deepcopy(VARIANT2) -genotypes = VARIANT2_BOTH_SAMPLE_TYPES['genotypes'] -VARIANT2_BOTH_SAMPLE_TYPES['genotypes'] = { + +VARIANT2_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY = deepcopy(VARIANT2) +genotypes = VARIANT2_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] +VARIANT2_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] = { 'I000004_hg00731': [ genotypes['I000004_hg00731'], {**genotypes['I000004_hg00731'], 'sampleType': 'WGS'} @@ -291,6 +298,17 @@ 'I000006_hg00733': [genotypes['I000006_hg00733']], } +VARIANT2_BOTH_SAMPLE_TYPES = deepcopy(VARIANT2_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY) +genotypes = VARIANT2_BOTH_SAMPLE_TYPES['genotypes'] +VARIANT2_BOTH_SAMPLE_TYPES['genotypes']['I000005_hg00732'] = [ + *genotypes['I000005_hg00732'], + {**genotypes['I000005_hg00732'][0], 'numAlt': 0, 'sampleType': 'WGS'} +] +VARIANT2_BOTH_SAMPLE_TYPES['genotypes']['I000006_hg00733'] = [ + *genotypes['I000006_hg00733'], + {**genotypes['I000006_hg00733'][0], 'sampleType': 'WGS'} +] + VARIANT3 = { 'variantId': '1-91502721-G-A', 'chrom': '1', @@ -361,6 +379,29 @@ '_sort': [1091502721], 'CAID': 'CA10960369', } + +VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY = deepcopy(VARIANT3) +genotypes = VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] +VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] = { + 'I000004_hg00731': [ + genotypes['I000004_hg00731'], + {**genotypes['I000004_hg00731'], 'sampleType': 'WGS'} + ], + 'I000005_hg00732': [genotypes['I000005_hg00732']], + 'I000006_hg00733': [genotypes['I000006_hg00733']], +} + +VARIANT3_BOTH_SAMPLE_TYPES = deepcopy(VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY) +genotypes = VARIANT3_BOTH_SAMPLE_TYPES['genotypes'] +VARIANT3_BOTH_SAMPLE_TYPES['genotypes']['I000005_hg00732'] = [ + *genotypes['I000005_hg00732'], + {**genotypes['I000005_hg00732'][0], 'sampleType': 'WGS'} +] +VARIANT3_BOTH_SAMPLE_TYPES['genotypes']['I000006_hg00733'] = [ + *genotypes['I000006_hg00733'], + {**genotypes['I000006_hg00733'][0], 'sampleType': 'WGS'} +] + VARIANT4 = { 'variantId': '1-91511686-T-G', 'chrom': '1', @@ -434,6 +475,28 @@ 'CAID': 'CA341062623', } +VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY = deepcopy(VARIANT4) +genotypes = VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] +VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY['genotypes'] = { + 'I000004_hg00731': [ + genotypes['I000004_hg00731'], + {**genotypes['I000004_hg00731'], 'sampleType': 'WGS'} + ], + 'I000005_hg00732': [genotypes['I000005_hg00732']], + 'I000006_hg00733': [genotypes['I000006_hg00733']], +} + +VARIANT4_BOTH_SAMPLE_TYPES = deepcopy(VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY) +genotypes = VARIANT4_BOTH_SAMPLE_TYPES['genotypes'] +VARIANT4_BOTH_SAMPLE_TYPES['genotypes']['I000005_hg00732'] = [ + *genotypes['I000005_hg00732'], + {**genotypes['I000005_hg00732'][0], 'sampleType': 'WGS'} +] +VARIANT4_BOTH_SAMPLE_TYPES['genotypes']['I000006_hg00733'] = [ + *genotypes['I000006_hg00733'], + {**genotypes['I000006_hg00733'][0], 'sampleType': 'WGS'} +] + VARIANT_LOOKUP_VARIANT = { **VARIANT1, 'familyGenotypes': { From 3206941373154cbce37c06115dfed6c7dba929e9 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 4 Nov 2024 14:34:09 -0500 Subject: [PATCH 13/18] oop --- .../families/WGS/F000002_2_old.ht/.README.txt.crc | Bin 12 -> 0 bytes .../families/WGS/F000002_2_old.ht/._SUCCESS.crc | Bin 8 -> 0 bytes .../WGS/F000002_2_old.ht/.metadata.json.gz.crc | Bin 12 -> 0 bytes .../families/WGS/F000002_2_old.ht/README.txt | 3 --- .../families/WGS/F000002_2_old.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin 12 -> 0 bytes .../WGS/F000002_2_old.ht/globals/metadata.json.gz | Bin 289 -> 0 bytes .../F000002_2_old.ht/globals/parts/.part-0.crc | Bin 12 -> 0 bytes .../WGS/F000002_2_old.ht/globals/parts/part-0 | Bin 123 -> 0 bytes .../.index.crc | Bin 12 -> 0 bytes .../.metadata.json.gz.crc | Bin 12 -> 0 bytes .../index | Bin 111 -> 0 bytes .../metadata.json.gz | Bin 184 -> 0 bytes .../WGS/F000002_2_old.ht/metadata.json.gz | Bin 352 -> 0 bytes .../F000002_2_old.ht/rows/.metadata.json.gz.crc | Bin 16 -> 0 bytes .../WGS/F000002_2_old.ht/rows/metadata.json.gz | Bin 630 -> 0 bytes ...art-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc | Bin 12 -> 0 bytes .../part-0-57b69294-ffdb-4b39-842a-adf898b62d31 | Bin 206 -> 0 bytes 18 files changed, 3 deletions(-) delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/.README.txt.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/._SUCCESS.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/README.txt delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/_SUCCESS delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/globals/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/globals/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/globals/parts/.part-0.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/globals/parts/part-0 delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/index delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/.metadata.json.gz.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/metadata.json.gz delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc delete mode 100644 hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/.README.txt.crc deleted file mode 100644 index b774b2827b5f9698c8ceb8bcb104c1f48138e580..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}8v`{4o{)695B# diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/._SUCCESS.crc deleted file mode 100644 index 3b7b044936a890cd8d651d349a752d819d71d22c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8 PcmYc;N@ieSU}69O2$TUk diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/.metadata.json.gz.crc deleted file mode 100644 index 5f2f901b97131b2aeb5048cac56077d681ae57cb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}6YqU%2MHm|&VVcJw%Og9 z)TaO4843l#i#=`Mk7qxo1*-~z7+xHbBb>cGW)9N)kx1-}(nNX6f~$3Cy(+4lmIi%1O7ERh&76H>m1`DJU7-r}m$R=0CdLP~lTE(% zn_S|11nb6bj_8cSpq)Z{ZHHXd>ovKv>piE*&G;@kS`p|EljBJw$xFRu$gj{!#LRqi zU^iCVvVi(e-+ZsGBIpnVPt0aa5pe8H#RmhWOPk0w$j%xPAx3bO-U?CEKV&c z)-OpdE{RXfONmcO&CQPwGB7hWiZ944NX^O2OO4knOHR{EO0QxLcMoP@VeoAAn%ml- TGfh;57ibFzfNf%6WMBXQjJYDO diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.index.crc deleted file mode 100644 index fb65f202b7365f1ec2c4dee8517cc4c063161e7f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Bi4v_ljC5w8Nf diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/.metadata.json.gz.crc deleted file mode 100644 index 4eb1dd4f7a5598fce8f9d708d58fd7ca59672cc9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}DHvTU`nO5|RTz diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/index deleted file mode 100644 index 51f5b8d2767b5254f9395fb4341922a95fa01caf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 111 zcmeZZU|_HZVvVi(e--TSGBU6*Gcd5QBxe*E9uH(l1!ORkx$iTqJ2s8`DC;+X888!d_ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/index/part-0-57b69294-ffdb-4b39-842a-adf898b62d31.idx/metadata.json.gz deleted file mode 100644 index caa522e8089ac6d2d2dba6e427e15c1bc315afa2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 184 zcmV;p07w5HiwFP!0000009B5`3c@fDME_+^3ON)lwdN**9uyQ6FXAD_Z4(TMBwH|) z{<~XmUY3QKc{4pU*5MUnvcZF_s-mm~Jfia2Sl@1xrVv4v#j+A$(6)v^&IG(LU-BD4 z!kJ>HsdqtlN$$rx*`C&BUQdN`4jm@Bic#eo8<}_?K!EZ-kRq=9hm4xvVNTpA!1^bC mj*1K$em|=*1UfWLLY}eUNs5^I=m?z1j^a?|IJC_CQ6pxq##cjLnrKY&Z&oF(rGRi%UozdqJ-s1`os082a0% zU^AIa2qgx#U_ho|69XdA-0cy*q=-1aLA@(AVCA!ZsdhqbxL_-1iqut>A2bqqX4%vj zUam3EY$ZGizTcg zr<*rR$q)5JmS3}|LslBnW~k>$cD%<_9sl3gz}Td@ak;}!n53#`&(I1MT`6iRx3Z33 y58~Qmlm_JC%5Vp*BIObe$bK_B^WN;Ub`M0*XCOYg6AWH}O@H3Zwh&sxAj>D%6aSIK zNw`b#oyIZ5kdg(4!h}Jn6N!H)E6n#>V52I4S)d_|d>IEcg_)AOP#yAeehUf{q4-*3 zTu`|;wn@qrPXr=&LfKYk9D5@$l#=cr%hZJt@mPUO{gVI)B{@`qX1~b~zbI@B1AlxQ zCsa9o@(_}i@*zV_wV9^W{xmefaW}qspq_&g}2UfoZux9WqF`;W2BZ1W5Y{8MKmC8l3UJnBWWswsX4dSE^XP z^ul#04Cr%rWu|{9f@`;RG~AU~ZaxX|163Quudcr4=^BlJM!Cga<-6vdM_i_A$=OR` zDx5FcyJe_47bE^VdNwnXC`v(9OsEivqQ}*Z9@%;S)lwQlT6WR`E;Vb#>V#8=%?B|c zewc#B-_bX3GgRJBf;exX+_6ji{$bhK>{BBa$h(`aXd@v^Bs)*)LLXKWXnF5cc~^H= z_2o`IC*?mvKB9SXXm31`@)^|AX;;aJF21Ew-Ew`<A8RonU(Xfuss6Xko=I0v`$wW7lvaO{7zLnE2*( z-}bWlqJjN~Bg_~Q+;Refy|RvH4ouT=opz}?=1lTRT!itCp}BKvs2wV7tea>g(`u+v QQuoTTe|WRzO(zKe0M|P#UH||9 diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/parts/.part-0-57b69294-ffdb-4b39-842a-adf898b62d31.crc deleted file mode 100644 index 7b6b76d406cc2627893358ce61707360c0866abe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}AVF>}&!65*Grl diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 b/hail_search/fixtures/GRCh38/SNV_INDEL/families/WGS/F000002_2_old.ht/rows/parts/part-0-57b69294-ffdb-4b39-842a-adf898b62d31 deleted file mode 100644 index 57088cd43339b536d15c8ceb6a6793a3753cf5ca..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 206 zcmdnVz`*bWh&8tA|5f;r%F0l}!N|apoKa+WJdlaW(V5Yafti6pgu#Y|A(=sf=Yc(! z$l+tVg_sya7~M4#pV%vWuvcmPctM+q(LGsz;og>SLbvRtc^HywukkKBk$%@+mF0uI z22b1ei`tCRVS&LxAw`LK$r-7^egT%2dX}ao`Gy7tdIko}x-1M5EDTmPQi(g43m&tV z^CxC4V^N}rFMyuz Date: Mon, 4 Nov 2024 14:37:51 -0500 Subject: [PATCH 14/18] typo --- hail_search/definitions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/definitions.py b/hail_search/definitions.py index bf8baaec2c..0ee52d076a 100644 --- a/hail_search/definitions.py +++ b/hail_search/definitions.py @@ -16,7 +16,7 @@ def family_entries_field(self) -> str: def passes_inheritance_field(self) -> str: return { SampleType.WES: 'wes_passes_inheritance', - SampleType.WGS: 'wgs_passes_quality', + SampleType.WGS: 'wes_passes_inheritance', }[self] @property From 2d5d07f20f53857353b7b6b4a1a7e00e5947950b Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 4 Nov 2024 14:38:27 -0500 Subject: [PATCH 15/18] typo 2 --- hail_search/definitions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/definitions.py b/hail_search/definitions.py index 0ee52d076a..bc6aa5b204 100644 --- a/hail_search/definitions.py +++ b/hail_search/definitions.py @@ -16,7 +16,7 @@ def family_entries_field(self) -> str: def passes_inheritance_field(self) -> str: return { SampleType.WES: 'wes_passes_inheritance', - SampleType.WGS: 'wes_passes_inheritance', + SampleType.WGS: 'wgs_passes_inheritance', }[self] @property From f82c0fe72bb93169401607a16c74d0375e1456f4 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 5 Nov 2024 11:10:05 -0500 Subject: [PATCH 16/18] PR comments --- hail_search/test_search.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 3593caabd5..0b9c312ffa 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -368,7 +368,7 @@ async def test_both_sample_types_search(self): MULTI_PROJECT_BOTH_SAMPLE_TYPE_VARIANTS, gene_counts=GENE_COUNTS, sample_data=MULTI_PROJECT_SAMPLE_TYPES_SAMPLE_DATA, ) - # Variant 1 is de novo in exome but maternally inherited in genome. + # Variant 1 is de novo in exome but inherited and homozygous in genome. # Variant 2 is inherited in exome and de novo in genome. # Variant 3 is inherited in both sample types. Variant 4 is de novo in both sample types. inheritance_mode = 'recessive' @@ -377,14 +377,6 @@ async def test_both_sample_types_search(self): sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, **COMP_HET_ALL_PASS_FILTERS ) - inheritance_mode = 'de_novo' - await self._assert_expected_search( - [VARIANT1_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES, VARIANT4_BOTH_SAMPLE_TYPES], - sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, - **COMP_HET_ALL_PASS_FILTERS - ) - - # Same variants, but genome data is proband-only. inheritance_mode = 'recessive' await self._assert_expected_search( [VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT2_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, @@ -392,11 +384,16 @@ async def test_both_sample_types_search(self): sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, inheritance_mode=inheritance_mode, **COMP_HET_ALL_PASS_FILTERS ) + + inheritance_mode = 'de_novo' + await self._assert_expected_search( + [VARIANT1_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES, VARIANT4_BOTH_SAMPLE_TYPES], + sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, + ) inheritance_mode = 'de_novo' await self._assert_expected_search( [VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, inheritance_mode=inheritance_mode, - **COMP_HET_ALL_PASS_FILTERS ) async def test_inheritance_filter(self): From 0158391da3fc1ba280f3016e53c9f6edaca86da6 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 7 Nov 2024 13:46:10 -0500 Subject: [PATCH 17/18] comments --- hail_search/test_search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 0b9c312ffa..3878318e27 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -369,7 +369,7 @@ async def test_both_sample_types_search(self): ) # Variant 1 is de novo in exome but inherited and homozygous in genome. - # Variant 2 is inherited in exome and de novo in genome. + # Variant 2 is inherited and homozygous in exome and de novo and homozygous in genome. # Variant 3 is inherited in both sample types. Variant 4 is de novo in both sample types. inheritance_mode = 'recessive' await self._assert_expected_search( @@ -390,6 +390,7 @@ async def test_both_sample_types_search(self): [VARIANT1_BOTH_SAMPLE_TYPES, VARIANT2_BOTH_SAMPLE_TYPES, VARIANT4_BOTH_SAMPLE_TYPES], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, ) + # Variant 2 fails inheritance when parental data is missing in genome inheritance_mode = 'de_novo' await self._assert_expected_search( [VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY], From 0b542e17ba8e6cfa1b50870df0ba801bdf13ee67 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Thu, 7 Nov 2024 14:15:11 -0500 Subject: [PATCH 18/18] extra lines --- hail_search/test_search.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 3878318e27..8fc9caa68d 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -377,7 +377,6 @@ async def test_both_sample_types_search(self): sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, **COMP_HET_ALL_PASS_FILTERS ) - inheritance_mode = 'recessive' await self._assert_expected_search( [VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT2_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, [VARIANT3_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY]], @@ -391,7 +390,6 @@ async def test_both_sample_types_search(self): sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA, inheritance_mode=inheritance_mode, ) # Variant 2 fails inheritance when parental data is missing in genome - inheritance_mode = 'de_novo' await self._assert_expected_search( [VARIANT1_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY, VARIANT4_BOTH_SAMPLE_TYPES_PROBAND_WGS_ONLY], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, inheritance_mode=inheritance_mode,