@@ -14,7 +14,7 @@ use rustc_hash::FxHashMap;
1414use std:: collections:: hash_map:: DefaultHasher ;
1515use std:: fs:: File ;
1616use std:: hash:: { Hash , Hasher } ;
17- use std:: io:: { self , BufRead , BufReader , BufWriter } ;
17+ use std:: io:: { self , BufRead , BufReader , BufWriter , Write } ;
1818
1919use std:: num:: NonZeroUsize ;
2020use std:: sync:: atomic:: { AtomicUsize , Ordering } ;
@@ -197,7 +197,7 @@ impl GfaMafFastaOpts {
197197 Option < UnifiedSequenceIndex > ,
198198 Option < ( u8 , u8 , u8 , u8 , u8 , u8 ) > ,
199199 ) > {
200- let needs_sequence_mandatory = matches ! ( output_format, "gfa" | "maf" | "fasta" ) ;
200+ let needs_sequence_mandatory = matches ! ( output_format, "gfa" | "maf" | "fasta" | "fasta+paf" ) ;
201201 let needs_sequence_optional = output_format == "paf" && original_sequence_coordinates;
202202 let needs_poa = matches ! ( output_format, "gfa" | "maf" ) ;
203203
@@ -523,11 +523,15 @@ enum Args {
523523 #[ clap( flatten) ]
524524 query : QueryOpts ,
525525
526- /// Output format: 'auto' ('bed' for -r, 'bedpe' for -b), 'bed', 'bedpe', 'paf', 'gfa' (v1.0), 'maf', or 'fasta' ('gfa', 'maf', and 'fasta' require --sequence-files or --sequence-list)
526+ /// Output format: 'auto' ('bed' for -r, 'bedpe' for -b), 'bed', 'bedpe', 'paf', 'gfa' (v1.0), 'maf', 'fasta', or 'fasta+paf ' ('gfa', 'maf', 'fasta', and 'fasta+paf ' require --sequence-files or --sequence-list)
527527 #[ arg( help_heading = "Output options" ) ]
528528 #[ clap( short = 'o' , long, value_parser, default_value = "auto" ) ]
529529 output_format : String ,
530530
531+ /// Destination file basename, or nothing for standard output
532+ #[ clap( short = 'O' , long, value_parser, default_value = None ) ]
533+ output_basename : Option < String > ,
534+
531535 #[ clap( flatten) ]
532536 gfa_maf_fasta : GfaMafFastaOpts ,
533537
@@ -801,13 +805,14 @@ fn main() -> io::Result<()> {
801805 paf,
802806 query,
803807 output_format,
808+ output_basename,
804809 gfa_maf_fasta,
805810 } => {
806811 initialize_threads_and_log ( & common) ;
807812
808813 validate_output_format (
809814 & output_format,
810- & [ "auto" , "bed" , "bedpe" , "paf" , "gfa" , "maf" , "fasta" ] ,
815+ & [ "auto" , "bed" , "bedpe" , "paf" , "gfa" , "maf" , "fasta" , "fasta+paf" ] ,
811816 ) ?;
812817
813818 let impg = initialize_impg ( & common, & paf) ?;
@@ -898,47 +903,54 @@ fn main() -> io::Result<()> {
898903 subset_filter. as_ref ( ) ,
899904 ) ?;
900905
906+ // TODO: Why is name an Option for all the output functions?
907+ let name_opt = Some ( name) ;
908+
901909 // Output results based on the resolved format
902910 match resolved_output_format {
903911 "bed" => {
904912 // BED format - include the first element
905913 output_results_bed (
906914 & impg,
907915 & mut results,
908- Some ( name) ,
916+ & mut find_output_stream ( & output_basename, "bed" ) ?,
917+ & name_opt,
909918 query. effective_merge_distance ( ) ,
910919 query. original_sequence_coordinates ,
911- ) ;
920+ ) ? ;
912921 }
913922 "bedpe" => {
914923 // Skip the first element (the input range) for BEDPE output
915924 results. remove ( 0 ) ;
916925 output_results_bedpe (
917926 & impg,
918927 & mut results,
919- Some ( name) ,
928+ & mut find_output_stream ( & output_basename, "bed" ) ?,
929+ & name_opt,
920930 query. effective_merge_distance ( ) ,
921931 query. original_sequence_coordinates ,
922- ) ;
932+ ) ? ;
923933 }
924934 "paf" => {
925935 // Skip the first element (the input range) for PAF output
926936 results. remove ( 0 ) ;
927937 output_results_paf (
928938 & impg,
929939 & mut results,
930- Some ( name) ,
940+ & mut find_output_stream ( & output_basename, "paf" ) ?,
941+ & name_opt,
931942 query. effective_merge_distance ( ) ,
932943 query. original_sequence_coordinates ,
933944 sequence_index. as_ref ( ) ,
934- ) ;
945+ ) ? ;
935946 }
936947 "gfa" => {
937948 output_results_gfa (
938949 & impg,
939950 & mut results,
951+ & mut find_output_stream ( & output_basename, "gfa" ) ?,
940952 sequence_index. as_ref ( ) . unwrap ( ) ,
941- Some ( name ) ,
953+ & name_opt ,
942954 query. effective_merge_distance ( ) ,
943955 scoring_params. unwrap ( ) ,
944956 ) ?;
@@ -947,8 +959,9 @@ fn main() -> io::Result<()> {
947959 output_results_maf (
948960 & impg,
949961 & mut results,
962+ & mut find_output_stream ( & output_basename, "maf" ) ?,
950963 sequence_index. as_ref ( ) . unwrap ( ) ,
951- Some ( name ) ,
964+ & name_opt ,
952965 query. effective_merge_distance ( ) ,
953966 scoring_params. unwrap ( ) ,
954967 ) ?;
@@ -957,12 +970,35 @@ fn main() -> io::Result<()> {
957970 output_results_fasta (
958971 & impg,
959972 & mut results,
973+ & mut find_output_stream ( & output_basename, "fa" ) ?,
960974 sequence_index. as_ref ( ) . unwrap ( ) ,
961- Some ( name ) ,
975+ & name_opt ,
962976 query. effective_merge_distance ( ) ,
963977 reverse_complement,
964978 ) ?;
965- }
979+ } ,
980+ "fasta+paf" => {
981+ output_results_fasta (
982+ & impg,
983+ & mut results,
984+ & mut find_output_stream ( & output_basename, "fa" ) ?,
985+ sequence_index. as_ref ( ) . unwrap ( ) ,
986+ & name_opt,
987+ query. effective_merge_distance ( ) ,
988+ reverse_complement,
989+ ) ?;
990+ // Skip the first element (the input range) for PAF output
991+ results. remove ( 0 ) ;
992+ output_results_paf (
993+ & impg,
994+ & mut results,
995+ & mut find_output_stream ( & output_basename, "paf" ) ?,
996+ & name_opt,
997+ query. effective_merge_distance ( ) ,
998+ query. original_sequence_coordinates ,
999+ sequence_index. as_ref ( ) ,
1000+ ) ?;
1001+ } ,
9661002 _ => {
9671003 return Err ( io:: Error :: new (
9681004 io:: ErrorKind :: InvalidInput ,
@@ -1158,7 +1194,7 @@ fn main() -> io::Result<()> {
11581194fn validate_selection_mode ( mode : & str ) -> io:: Result < ( ) > {
11591195 match mode {
11601196 "longest" | "total" => Ok ( ( ) ) ,
1161- mode if mode == "sample" || mode == "haplotype"
1197+ mode if mode == "sample" || mode == "haplotype"
11621198 || mode. starts_with ( "sample," ) || mode. starts_with ( "haplotype," ) => Ok ( ( ) ) ,
11631199 _ => Err ( io:: Error :: new (
11641200 io:: ErrorKind :: InvalidInput ,
@@ -1383,6 +1419,20 @@ fn get_auto_reader(path: &str) -> io::Result<Box<dyn BufRead>> {
13831419 Ok ( Box :: new ( BufReader :: new ( reader) ) )
13841420}
13851421
1422+ /// Helper function to return a Write implementer that is either standard output or a file with the
1423+ /// appropriate basename and extension. When no basename is provided, uses standard output.
1424+ fn find_output_stream ( basename : & Option < String > , extension : & str ) -> io:: Result < Box < dyn Write > > {
1425+ // Anthropic's Claude came up with this.
1426+ match basename {
1427+ Some ( name) => {
1428+ let filename = format ! ( "{}.{}" , name, extension) ;
1429+ let file = File :: create ( filename) ?;
1430+ Ok ( Box :: new ( file) )
1431+ }
1432+ None => Ok ( Box :: new ( io:: stdout ( ) ) ) ,
1433+ }
1434+ }
1435+
13861436/// Load/generate index based on common and PAF options
13871437fn initialize_impg ( common : & CommonOpts , paf : & PafOpts ) -> io:: Result < Impg > {
13881438 // Resolve the list of PAF files
@@ -1778,10 +1828,11 @@ fn apply_subset_filter_if_provided(
17781828fn output_results_bed (
17791829 impg : & Impg ,
17801830 results : & mut Vec < AdjustedInterval > ,
1781- name : Option < String > ,
1831+ out : & mut dyn Write ,
1832+ name : & Option < String > ,
17821833 merge_distance : i32 ,
17831834 original_coordinates : bool ,
1784- ) {
1835+ ) -> io :: Result < ( ) > {
17851836 merge_query_adjusted_intervals ( results, merge_distance, false ) ;
17861837
17871838 for ( query_interval, _, _) in results {
@@ -1801,24 +1852,28 @@ fn output_results_bed(
18011852 original_coordinates,
18021853 ) ;
18031854
1804- println ! (
1855+ writeln ! (
1856+ out,
18051857 "{}\t {}\t {}\t {}\t .\t {}" ,
18061858 transformed_name,
18071859 transformed_first,
18081860 transformed_last,
18091861 name. as_deref( ) . unwrap_or( "." ) ,
18101862 strand
1811- ) ;
1863+ ) ? ;
18121864 }
1865+
1866+ Ok ( ( ) )
18131867}
18141868
18151869fn output_results_bedpe (
18161870 impg : & Impg ,
18171871 results : & mut Vec < AdjustedInterval > ,
1818- name : Option < String > ,
1872+ out : & mut dyn Write ,
1873+ name : & Option < String > ,
18191874 merge_distance : i32 ,
18201875 original_coordinates : bool ,
1821- ) {
1876+ ) -> io :: Result < ( ) > {
18221877 merge_adjusted_intervals ( results, merge_distance) ;
18231878
18241879 for ( overlap_query, cigar, overlap_target) in results {
@@ -1878,7 +1933,8 @@ fn output_results_bedpe(
18781933 . trim_end_matches ( '.' )
18791934 . to_string ( ) ;
18801935
1881- println ! (
1936+ writeln ! (
1937+ out,
18821938 "{}\t {}\t {}\t {}\t {}\t {}\t {}\t 0\t {}\t +\t gi:f:{}\t bi:f:{}" ,
18831939 transformed_query_name,
18841940 transformed_first,
@@ -1890,18 +1946,21 @@ fn output_results_bedpe(
18901946 strand,
18911947 gi_str,
18921948 bi_str
1893- ) ;
1949+ ) ? ;
18941950 }
1951+
1952+ Ok ( ( ) )
18951953}
18961954
18971955fn output_results_paf (
18981956 impg : & Impg ,
18991957 results : & mut Vec < AdjustedInterval > ,
1900- name : Option < String > ,
1958+ out : & mut dyn Write ,
1959+ name : & Option < String > ,
19011960 merge_distance : i32 ,
19021961 original_coordinates : bool ,
19031962 sequence_index : Option < & UnifiedSequenceIndex > ,
1904- ) {
1963+ ) -> io :: Result < ( ) > {
19051964 merge_adjusted_intervals ( results, merge_distance) ;
19061965
19071966 for ( overlap_query, cigar, overlap_target) in results {
@@ -1983,23 +2042,28 @@ fn output_results_paf(
19832042 . collect ( ) ;
19842043
19852044 match name {
1986- Some ( ref name) => println ! ( "{}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t gi:f:{}\t bi:f:{}\t cg:Z:{}\t an:Z:{}" ,
2045+ Some ( ref name) => writeln ! ( out,
2046+ "{}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t gi:f:{}\t bi:f:{}\t cg:Z:{}\t an:Z:{}" ,
19872047 transformed_query_name, query_length, transformed_first, transformed_last, strand,
19882048 transformed_target_name, target_length, transformed_target_first, transformed_target_last,
1989- matches, block_len, 255 , gi_str, bi_str, cigar_str, name) ,
1990- None => println ! ( "{}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t gi:f:{}\t bi:f:{}\t cg:Z:{}" ,
2049+ matches, block_len, 255 , gi_str, bi_str, cigar_str, name) ?,
2050+ None => writeln ! ( out,
2051+ "{}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t {}\t gi:f:{}\t bi:f:{}\t cg:Z:{}" ,
19912052 transformed_query_name, query_length, transformed_first, transformed_last, strand,
19922053 transformed_target_name, target_length, transformed_target_first, transformed_target_last,
1993- matches, block_len, 255 , gi_str, bi_str, cigar_str) ,
2054+ matches, block_len, 255 , gi_str, bi_str, cigar_str) ? ,
19942055 }
19952056 }
2057+
2058+ Ok ( ( ) )
19962059}
19972060
19982061fn output_results_gfa (
19992062 impg : & Impg ,
20002063 results : & mut Vec < AdjustedInterval > ,
2064+ out : & mut dyn Write ,
20012065 sequence_index : & UnifiedSequenceIndex ,
2002- _name : Option < String > ,
2066+ _name : & Option < String > ,
20032067 merge_distance : i32 ,
20042068 scoring_params : ( u8 , u8 , u8 , u8 , u8 , u8 ) ,
20052069) -> io:: Result < ( ) > {
@@ -2017,16 +2081,17 @@ fn output_results_gfa(
20172081 sequence_index,
20182082 scoring_params,
20192083 ) ;
2020- print ! ( "{gfa_output}" ) ;
2084+ writeln ! ( out , "{gfa_output}" ) ? ;
20212085
20222086 Ok ( ( ) )
20232087}
20242088
20252089fn output_results_fasta (
20262090 impg : & Impg ,
20272091 results : & mut Vec < AdjustedInterval > ,
2092+ out : & mut dyn Write ,
20282093 sequence_index : & UnifiedSequenceIndex ,
2029- _name : Option < String > ,
2094+ _name : & Option < String > ,
20302095 merge_distance : i32 ,
20312096 reverse_complement : bool ,
20322097) -> io:: Result < ( ) > {
@@ -2079,8 +2144,8 @@ fn output_results_fasta(
20792144
20802145 // Output sequences sequentially to maintain order
20812146 for ( header, sequence) in sequence_data {
2082- println ! ( "{header}" ) ;
2083- println ! ( "{sequence}" ) ;
2147+ writeln ! ( out , "{header}" ) ? ;
2148+ writeln ! ( out , "{sequence}" ) ? ;
20842149 }
20852150
20862151 Ok ( ( ) )
@@ -2089,8 +2154,9 @@ fn output_results_fasta(
20892154fn output_results_maf (
20902155 impg : & Impg ,
20912156 results : & mut Vec < AdjustedInterval > ,
2157+ out : & mut dyn Write ,
20922158 sequence_index : & UnifiedSequenceIndex ,
2093- _name : Option < String > ,
2159+ _name : & Option < String > ,
20942160 merge_distance : i32 ,
20952161 scoring_params : ( u8 , u8 , u8 , u8 , u8 , u8 ) ,
20962162) -> io:: Result < ( ) > {
@@ -2108,7 +2174,7 @@ fn output_results_maf(
21082174 sequence_index,
21092175 scoring_params,
21102176 ) ;
2111- print ! ( "{maf_output}" ) ;
2177+ writeln ! ( out , "{maf_output}" ) ? ;
21122178
21132179 Ok ( ( ) )
21142180}
0 commit comments