@@ -448,7 +448,8 @@ bool VCFOutputCaller::emit_variant(const PathPositionHandleGraph& graph, SnarlCa
448448 const Snarl& snarl, const vector<SnarlTraversal>& called_traversals,
449449 const vector<int >& genotype, int ref_trav_idx, const unique_ptr<SnarlCaller::CallInfo>& call_info,
450450 const string& ref_path_name, int ref_offset, bool genotype_snarls, int ploidy,
451- function<string(const vector<SnarlTraversal>&, const vector<int >&, int , int , int )> trav_to_string) {
451+ function<string(const vector<SnarlTraversal>&, const vector<int >&, int , int , int )> trav_to_string,
452+ const vector<pair<string, string>>& info_tags) {
452453
453454#ifdef debug
454455 cerr << " emitting variant for " << pb2json (snarl) << endl;
@@ -581,6 +582,11 @@ bool VCFOutputCaller::emit_variant(const PathPositionHandleGraph& graph, SnarlCa
581582 // add some support info
582583 snarl_caller.update_vcf_info (snarl, site_traversals, site_genotype, call_info, sample_name, out_variant);
583584
585+ // add the info
586+ for (const pair<string, string>& info_tag : info_tags) {
587+ out_variant.info [info_tag.first ].push_back (info_tag.second );
588+ }
589+
584590 // if genotype_snarls, then we only flatten up to the snarl endpoints
585591 // (this is when we are in genotyping mode and want consistent calls regardless of the sample)
586592 int64_t flatten_len_s = 0 ;
@@ -1827,13 +1833,24 @@ bool FlowCaller::call_snarl(const Snarl& managed_snarl) {
18271833
18281834 vector<SnarlTraversal> travs;
18291835 FlowTraversalFinder* flow_trav_finder = dynamic_cast <FlowTraversalFinder*>(&traversal_finder);
1836+ vector<pair<string, string>> extra_info_tags;
1837+ int64_t site_allele_count = -1 ;
18301838 if (flow_trav_finder != nullptr ) {
18311839 // find the max flow traversals using specialized interface that accepts avg heurstic toggle
18321840 pair<vector<SnarlTraversal>, vector<double >> weighted_travs = flow_trav_finder->find_weighted_traversals (snarl, greedy_avg_flow);
18331841 travs = std::move (weighted_travs.first );
18341842 } else {
1835- // find the traversals using the generic interface
1836- travs = traversal_finder.find_traversals (snarl);
1843+ // find the traversals with the gbwt. also count the haplotypes in the site and remember it
1844+ // for a VCF tag (GAN)
1845+ GBWTTraversalFinder* gbwt_trav_finder = dynamic_cast <GBWTTraversalFinder*>(&traversal_finder);
1846+ if (gbwt_trav_finder != nullptr ) {
1847+ pair<vector<SnarlTraversal>, vector<gbwt::size_type>> gbwt_path_travs = gbwt_trav_finder->find_path_traversals (snarl);
1848+ travs = std::move (gbwt_path_travs.first );
1849+ extra_info_tags.push_back (make_pair (" GAN" , std::to_string (gbwt_trav_finder->count_haplotypes (gbwt_path_travs.second ))));
1850+ } else {
1851+ // find the traversals using the generic interface
1852+ travs = traversal_finder.find_traversals (snarl);
1853+ }
18371854 }
18381855
18391856 if (travs.empty ()) {
@@ -1893,7 +1910,7 @@ bool FlowCaller::call_snarl(const Snarl& managed_snarl) {
18931910 bool added = true ;
18941911 if (!gaf_output) {
18951912 added = emit_variant (graph, snarl_caller, snarl, travs, trav_genotype, ref_trav_idx, trav_call_info, ref_path_name,
1896- ref_offsets[ref_path_name], genotype_snarls, ploidy);
1913+ ref_offsets[ref_path_name], genotype_snarls, ploidy, nullptr , extra_info_tags );
18971914 } else {
18981915 pair<string, int64_t > pos_info = get_ref_position (graph, snarl, ref_path_name, ref_offsets[ref_path_name]);
18991916 emit_gaf_variant (graph, print_snarl (snarl), travs, trav_genotype, ref_trav_idx, pos_info.first , pos_info.second , &support_finder);
@@ -1908,6 +1925,9 @@ bool FlowCaller::call_snarl(const Snarl& managed_snarl) {
19081925string FlowCaller::vcf_header (const PathHandleGraph& graph, const vector<string>& contigs,
19091926 const vector<size_t >& contig_length_overrides) const {
19101927 string header = VCFOutputCaller::vcf_header (graph, contigs, contig_length_overrides);
1928+ if (dynamic_cast <GBWTTraversalFinder*>(&this ->traversal_finder ) != nullptr ) {
1929+ header+= " ##INFO=<ID=GAN,Number=1,Type=Integer,Description=\" Number of haplotypes going through site in the graph\" >\n " ;
1930+ }
19111931 header += " ##FORMAT=<ID=GT,Number=1,Type=String,Description=\" Genotype\" >\n " ;
19121932 snarl_caller.update_vcf_header (header);
19131933 header += " ##FILTER=<ID=PASS,Description=\" All filters passed\" >\n " ;
0 commit comments