Skip to content

Commit

Permalink
Merge pull request #62 from johnlees/dist_progress
Browse files Browse the repository at this point in the history
Improve distance calculation efficiency and progress tracking
  • Loading branch information
johnlees authored Jun 18, 2021
2 parents 4bc635d + 70f97d0 commit 52664e1
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 13 deletions.
2 changes: 1 addition & 1 deletion pp_sketch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

'''PopPUNK sketching functions'''

__version__ = '1.7.2'
__version__ = '1.7.3'
31 changes: 22 additions & 9 deletions src/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
using namespace Eigen;
namespace py = pybind11;

const int progressBitshift = 10;

bool same_db_version(const std::string &db1_name, const std::string &db2_name) {
// Open databases
Database db1(db1_name + ".h5");
Expand Down Expand Up @@ -177,9 +179,18 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,

arma::mat kmer_mat = kmer2mat<std::vector<size_t>>(kmer_lengths);

// Set up progress meter
size_t progress_blocks = 1 << progressBitshift;
size_t update_every = dist_rows >> progressBitshift;
if (progress_blocks > dist_rows || update_every < 1) {
progress_blocks = dist_rows;
update_every = 1;
}
ProgressMeter dist_progress(progress_blocks, true);
int progress = 0;

// Iterate upper triangle
ProgressMeter dist_progress(dist_rows, true);
#pragma omp parallel for simd schedule(guided, 1) num_threads(num_threads)
#pragma omp parallel for schedule(dynamic, 5) num_threads(num_threads) shared(progress)
for (size_t i = 0; i < ref_sketches.size(); i++) {
if (interrupt || PyErr_CheckSignals() != 0) {
interrupt = true;
Expand All @@ -188,18 +199,20 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,
size_t pos = square_to_condensed(i, j, ref_sketches.size());
if (jaccard) {
for (unsigned int kmer_idx = 0; kmer_idx < kmer_lengths.size();
kmer_idx++) {
kmer_idx++) {
distMat(pos, kmer_idx) = ref_sketches[i].jaccard_dist(
ref_sketches[j], kmer_lengths[kmer_idx], random_chance);
}
} else {
std::tie(distMat(pos, 0), distMat(pos, 1)) =
ref_sketches[i].core_acc_dist<RandomMC>(ref_sketches[j], kmer_mat,
random_chance);
ref_sketches[i].core_acc_dist<RandomMC>(
ref_sketches[j], kmer_mat, random_chance);
}
if (pos % update_every == 0) {
#pragma omp atomic
progress++;
dist_progress.tick(1);
}
}
if (omp_get_thread_num() == 0) {
dist_progress.tick(ref_sketches.size() / 2);
}
}
}
Expand Down Expand Up @@ -232,7 +245,7 @@ NumpyMatrix query_db(std::vector<Reference> &ref_sketches,
const long dist_row = q_idx * ref_sketches.size() + r_idx;
if (jaccard) {
for (unsigned int kmer_idx = 0; kmer_idx < kmer_lengths.size();
kmer_idx++) {
kmer_idx++) {
double jaccard_random = random_chance.random_match(
ref_sketches[r_idx], query_random_idxs[q_idx],
query_lengths[q_idx], kmer_lengths[kmer_idx]);
Expand Down
1 change: 0 additions & 1 deletion src/gpu/dist.cu
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,6 @@ void reportDistProgress(progress_atomics progress, long long dist_rows) {
// Initialise device and return info on its memory
std::tuple<size_t, size_t, size_t> initialise_device(const int device_id) {
CUDA_CALL(cudaSetDevice(device_id));
CUDA_CALL(cudaDeviceReset());

size_t mem_free = 0;
size_t mem_total = 0;
Expand Down
8 changes: 6 additions & 2 deletions src/sketch/progress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ class ProgressMeter {
tick(0);
}

void tick(size_t blocks) {
count_ += blocks;
void tick_count(size_t count) {
count_ = count;
if (percent_) {
double progress = count_ / static_cast<double>(total_);
progress = progress > 1 ? 1 : progress;
Expand All @@ -19,6 +19,10 @@ class ProgressMeter {
}
}

void tick(size_t blocks) {
tick_count(count_ + blocks);
}

void finalise() {
if (percent_) {
fprintf(stderr, "%cProgress (CPU): 100.0%%\n", 13);
Expand Down

0 comments on commit 52664e1

Please sign in to comment.