44#include " split.hpp"
55#include < omp.h>
66#include " utils.hpp"
7+ #include < sstream>
8+ #include < iomanip>
79
810namespace odgi {
911
@@ -371,6 +373,13 @@ int main_similarity(int argc, char** argv) {
371373 }
372374
373375 std::cout << std::endl;
376+
377+ // Use chunked buffering to balance speed and memory usage
378+ std::ostringstream output_buffer;
379+ output_buffer << std::fixed << std::setprecision (8 );
380+ const size_t buffer_chunk_size = 100000 ; // Lines per chunk
381+ size_t lines_written = 0 ;
382+
374383 for (auto & p : path_intersection_length) {
375384 uint32_t id_a, id_b;
376385 decode_pair (p.first , &id_a, &id_b);
@@ -383,28 +392,40 @@ int main_similarity(int argc, char** argv) {
383392 const double dice = 2.0 * ((double ) intersection / (double )(bp_count[id_a] + bp_count[id_b]));
384393 const double estimated_identity = 2.0 * jaccard / (1.0 + jaccard);
385394
386- std::cout << get_path_name (id_a) << " \t "
387- << get_path_name (id_b) << " \t "
388- << bp_count[id_a] << " \t "
389- << bp_count[id_b] << " \t "
390- << intersection << " \t " ;
395+ output_buffer << get_path_name (id_a) << " \t "
396+ << get_path_name (id_b) << " \t "
397+ << bp_count[id_a] << " \t "
398+ << bp_count[id_b] << " \t "
399+ << intersection << " \t " ;
391400
392401 if (emit_distances) {
393402 const double euclidian_distance = std::sqrt ((double )((bp_count[id_a] + bp_count[id_b] - intersection) - intersection));
394403 const uint64_t manhattan_distance = (bp_count[id_a] + bp_count[id_b] - intersection) - intersection;
395- std::cout << (1.0 - jaccard) << " \t "
396- << (1.0 - cosine) << " \t "
397- << (1.0 - dice) << " \t "
398- << (1.0 - estimated_identity) << " \t "
399- << euclidian_distance << " \t "
400- << manhattan_distance << std::endl ;
404+ output_buffer << (1.0 - jaccard) << " \t "
405+ << (1.0 - cosine) << " \t "
406+ << (1.0 - dice) << " \t "
407+ << (1.0 - estimated_identity) << " \t "
408+ << euclidian_distance << " \t "
409+ << manhattan_distance << " \n " ;
401410 } else {
402- std::cout << jaccard << " \t "
403- << cosine << " \t "
404- << dice << " \t "
405- << estimated_identity << std::endl;
411+ output_buffer << jaccard << " \t "
412+ << cosine << " \t "
413+ << dice << " \t "
414+ << estimated_identity << " \n " ;
415+ }
416+
417+ // Flush buffer every chunk_size lines
418+ if (++lines_written % buffer_chunk_size == 0 ) {
419+ std::cout << output_buffer.str ();
420+ output_buffer.str (" " );
421+ output_buffer.clear ();
406422 }
407423 }
424+
425+ // Write remaining buffer
426+ if (!output_buffer.str ().empty ()) {
427+ std::cout << output_buffer.str ();
428+ }
408429
409430 return 0 ;
410431}
0 commit comments