Skip to content

Commit be6a020

Browse files
Merge pull request #605 from pangenome/similarity_all
`odgi similarity`: Add option to emit all pairs of paths/groups in similarity computation
2 parents 60afc5d + 4896048 commit be6a020

File tree

1 file changed

+38
-1
lines changed

1 file changed

+38
-1
lines changed

src/subcommand/similarity_main.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ int main_similarity(int argc, char** argv) {
4141
{'p', "delim-pos"});
4242
args::Flag distances(path_investigation_opts, "distances", "Provide distances (dissimilarities) instead of similarities. "
4343
"Outputs additional columns with the Euclidean and Manhattan distances." , {'d', "distances"});
44-
args::Group threading_opts(parser, "[ Threading ]");
44+
args::Flag all_pairs(path_investigation_opts, "all", "Emit entries for all pairs of paths/groups, including those with zero intersection.", {'a', "all"});
45+
46+
args::Group threading_opts(parser, "[ Threading ]");
4547
args::ValueFlag<uint64_t> threads(threading_opts, "N", "Number of threads to use for parallel operations.", {'t', "threads"});
4648
args::Group processing_info_opts(parser, "[ Processing Information ]");
4749
args::Flag progress(processing_info_opts, "progress", "Write the current progress to stderr.", {'P', "progress"});
@@ -235,6 +237,41 @@ args::Group threading_opts(parser, "[ Threading ]");
235237
}
236238

237239
ska::flat_hash_map<uint64_t, uint64_t> path_intersection_length;
240+
241+
const bool emit_all_pairs = args::get(all_pairs);
242+
if (emit_all_pairs) {
243+
if (show_progress) {
244+
std::cerr << "[odgi::similarity] Pre-populating pair map for --all output..." << std::endl;
245+
}
246+
if (using_delim) {
247+
const uint32_t num_groups = path_groups.size();
248+
for (uint32_t i = 0; i < num_groups; ++i) {
249+
for (uint32_t j = 0; j < num_groups; ++j) {
250+
// Initialize with 0 intersection. Will be updated later if intersection > 0.
251+
path_intersection_length[encode_pair(i, j)] = 0;
252+
}
253+
}
254+
} else {
255+
std::vector<uint32_t> actual_path_ids; // Stores individual path integer IDs if not grouping
256+
// If not grouping, collect the actual integer path handles used
257+
actual_path_ids.reserve(graph.get_path_count());
258+
graph.for_each_path_handle([&](const path_handle_t& p) {
259+
actual_path_ids.push_back((uint32_t)as_integer(p));
260+
});
261+
262+
// Iterate through all actual path integer IDs collected earlier
263+
for (const uint32_t id_i : actual_path_ids) {
264+
for (const uint32_t id_j : actual_path_ids) {
265+
// Initialize with 0 intersection.
266+
path_intersection_length[encode_pair(id_i, id_j)] = 0;
267+
}
268+
}
269+
}
270+
if (show_progress) {
271+
std::cerr << "[odgi::similarity] Pre-population complete. Map size: " << path_intersection_length.size() << std::endl;
272+
}
273+
}
274+
238275
graph.for_each_handle(
239276
[&](const handle_t& h) {
240277
// Skip masked-out nodes

0 commit comments

Comments
 (0)