-
Notifications
You must be signed in to change notification settings - Fork 89
handle wes/wgs inheritance edge case #4440
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
babadd7
7115f81
fb1dc19
de664cd
8d9d25d
877abbc
fb88af9
773ba0c
18d1d63
4d58e03
e1edb07
d806332
3206941
ef5a4ec
2d5d07f
f82c0fe
0158391
0b542e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
This folder comprises a Hail (www.hail.is) native Table or MatrixTable. | ||
Written with version 0.2.130-bea04d9c79b5 | ||
Created at 2024/10/02 14:46:35 | ||
Written with version 0.2.132-678e1f52b999 | ||
Created at 2024/10/28 16:21:30 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -209,34 +209,55 @@ def _filter_entries_ht_both_sample_types( | |
for sample_type, sorted_family_sample_data in sample_types: | ||
ht, ch_ht = self._filter_inheritance( | ||
ht, ch_ht, inheritance_filter, sorted_family_sample_data, | ||
annotation=sample_type.passes_inheritance_field, entries_ht_field=sample_type.family_entries_field | ||
annotate_func=self._annotate_failed_family_samples_inheritance, | ||
annotation=sample_type.failed_family_sample_field, entries_ht_field=sample_type.family_entries_field, | ||
) | ||
for family_idx, samples in enumerate(sorted_family_sample_data): | ||
family_guid = samples[0]['familyGuid'] | ||
family_guid_idx_map[family_guid][sample_type.value] = family_idx | ||
|
||
family_idx_map = hl.dict(family_guid_idx_map) | ||
ht = self._apply_multi_sample_type_entry_filters(ht, family_idx_map) | ||
ch_ht = self._apply_multi_sample_type_entry_filters(ch_ht, family_idx_map) | ||
family_guid_idx_map = hl.dict(family_guid_idx_map) | ||
ht = self._apply_multi_sample_type_entry_filters(ht, family_guid_idx_map) | ||
ch_ht = self._apply_multi_sample_type_entry_filters(ch_ht, family_guid_idx_map) | ||
return ht, ch_ht | ||
|
||
def _annotate_failed_family_samples_inheritance( | ||
self, ht, inheritance_mode, inheritance_filter, sorted_family_sample_data, annotation, entries_ht_field | ||
): | ||
entry_indices_by_gt = self._get_entry_indices_by_gt_map( | ||
inheritance_filter, inheritance_mode, sorted_family_sample_data | ||
) | ||
|
||
if ht is None: | ||
return ht | ||
|
||
# Initialize empty array | ||
ht = ht.annotate(**{annotation: ht[entries_ht_field].map(lambda x: hl.empty_array(hl.tstr))}) | ||
|
||
# Add failed genotype samples | ||
for genotype, entry_indices in entry_indices_by_gt.items(): | ||
if not entry_indices: | ||
continue | ||
|
||
entry_indices = hl.dict(entry_indices) | ||
ht = ht.annotate(**{annotation: hl.enumerate(ht[entries_ht_field]).starmap( | ||
lambda family_idx, entries: hl.bind( | ||
lambda failed_samples: ht[annotation][family_idx].extend(failed_samples), | ||
entry_indices.get(family_idx).filter( | ||
lambda sample_idx: ~self.GENOTYPE_QUERY_MAP[genotype](entries[sample_idx].GT) | ||
).map(lambda sample_idx: entries[sample_idx]['sampleId']) | ||
) | ||
)}) | ||
|
||
return ht | ||
|
||
def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map): | ||
if ht is None: | ||
return ht | ||
|
||
# Keep family from both sample types if either passes quality AND inheritance | ||
for sample_type in SampleType: | ||
ht = ht.annotate(**{ | ||
sample_type.family_entries_field: hl.enumerate(ht[sample_type.family_entries_field]).starmap( | ||
lambda i, family_samples: hl.or_missing( | ||
hl.bind( | ||
lambda other_sample_type_idx: ( | ||
self._family_has_valid_sample_type_entries(ht, sample_type, i) | | ||
self._family_has_valid_sample_type_entries(ht, sample_type.other_sample_type, other_sample_type_idx) | ||
), | ||
family_idx_map.get(hl.coalesce(family_samples)[0]['familyGuid']).get(sample_type.other_sample_type.value), | ||
), family_samples) | ||
)}) | ||
ht = self._apply_quality_entry_filters(ht, sample_type, family_idx_map) | ||
ht = self._apply_inheritance_entry_filters(ht, sample_type, family_idx_map) | ||
|
||
# Merge family entries and filters from both sample types | ||
ht = ht.transmute( | ||
|
@@ -252,15 +273,49 @@ def _apply_multi_sample_type_entry_filters(self, ht, family_idx_map): | |
# Filter out families with no valid entries in either sample type | ||
return ht.filter(ht.family_entries.any(hl.is_defined)) | ||
|
||
def _apply_quality_entry_filters(self, ht, sample_type, family_idx_map): | ||
return ht.annotate(**{ | ||
sample_type.family_entries_field: hl.enumerate(ht[sample_type.family_entries_field]).starmap( | ||
lambda i, family_samples: hl.or_missing( | ||
hl.bind(lambda other_sample_type_idx: ( | ||
self._family_has_valid_quality(ht, sample_type, i) | | ||
self._family_has_valid_quality(ht, sample_type.other_sample_type, other_sample_type_idx) | ||
), family_idx_map.get(hl.coalesce(family_samples)[0]['familyGuid']).get(sample_type.other_sample_type.value), | ||
), family_samples) | ||
)}) | ||
|
||
@staticmethod | ||
def _family_has_valid_sample_type_entries(ht, sample_type, sample_type_family_idx): | ||
# Note: This logic does not sufficiently handle case 2 here https://docs.google.com/presentation/d/1hqDV8ulhviUcR5C4PtNUqkCLXKDsc6pccgFVlFmWUAU/edit?usp=sharing | ||
# and will need to be changed to support it - https://github.com/broadinstitute/seqr/issues/4403 | ||
def _family_has_valid_quality(ht, sample_type, sample_type_family_idx): | ||
return ( | ||
hl.is_defined(sample_type_family_idx) & | ||
hl.is_defined(ht[sample_type.passes_quality_field][sample_type_family_idx]) & | ||
hl.is_defined(ht[sample_type.passes_inheritance_field][sample_type_family_idx]) | ||
hl.is_defined(ht[sample_type.passes_quality_field][sample_type_family_idx]) | ||
) | ||
|
||
@staticmethod | ||
def _apply_inheritance_entry_filters(ht, sample_type, family_idx_map): | ||
ht = ht.annotate( | ||
**{sample_type.family_entries_field: hl.enumerate(ht[sample_type.family_entries_field]).starmap( | ||
lambda family_idx, family_samples: hl.or_missing( | ||
hl.bind(lambda other_sample_type_family_idx: ( | ||
hl.bind( | ||
lambda other_sample_type_pass_samples, sample_type_pass_samples: ( | ||
ht[sample_type.failed_family_sample_field][family_idx].all( | ||
other_sample_type_pass_samples.contains | ||
) & ht[sample_type.other_sample_type.failed_family_sample_field][other_sample_type_family_idx].all( | ||
sample_type_pass_samples.contains | ||
)), | ||
|
||
ht[sample_type.other_sample_type.family_entries_field][other_sample_type_family_idx].filter( | ||
lambda s: ~ht[sample_type.other_sample_type.failed_family_sample_field][other_sample_type_family_idx].contains(s['sampleId']) | ||
).map(lambda s: s['sampleId']), | ||
ht[sample_type.other_sample_type.family_entries_field][family_idx].filter( | ||
lambda s: ~ht[sample_type.failed_family_sample_field][family_idx].contains(s['sampleId']) | ||
).map(lambda s: s['sampleId']), | ||
) | ||
), family_idx_map.get(hl.coalesce(family_samples)[0]['familyGuid']).get(sample_type.other_sample_type.value) | ||
), family_samples) | ||
)} | ||
) | ||
return ht | ||
|
||
def _get_sample_genotype(self, samples, r=None, include_genotype_overrides=False, select_fields=None, **kwargs): | ||
if not self._has_both_sample_types: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -388,10 +388,11 @@ async def test_both_sample_types_search(self): | |
[VARIANT2_BOTH_SAMPLE_TYPES], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, | ||
inheritance_mode=inheritance_mode, **COMP_HET_ALL_PASS_FILTERS, intervals=[variant2_interval] | ||
) | ||
# Genome passes quality and inheritance exome fails inheritance (parental data shows variant is inherited). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still think a comment here explaining whats being tested is helpful. Maybe something like "Variant 2 fails inheritance when parental data is present" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated! |
||
# Genome passes quality and inheritance but exome fails inheritance (parental data shows variant is inherited). | ||
# Variant is excluded from search results. | ||
|
||
inheritance_mode = 'de_novo' | ||
|
||
await self._assert_expected_search( | ||
[VARIANT2_BOTH_SAMPLE_TYPES], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, | ||
[], sample_data=FAMILY_2_BOTH_SAMPLE_TYPE_SAMPLE_DATA_MISSING_PARENTAL_WGS, | ||
inheritance_mode=inheritance_mode, intervals=[variant2_interval] | ||
) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This feels incredibly similar to
_annotate_families_inheritance
. Rather than making a whole separate function for this, you could make a much more tightly scoped conditional helper to pass into_annotate_families_inheritance
, perhaps just for thelambda
function applied to thehl.enumerate(ht[entries_ht_field]).starmap(
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I implemented this, but because I don't want the single sample type families that come originally through the mito class code path to call the mito 'family_passes_inheritance_filter' function, I'm still passing a
family_passes_inheritance_filter
function to_filter_inheritance
instead of using python inheritance here. Do you know if there's a cleaner way to do this?