-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #64 from johnlees/read_blocks
Use a buffer to hold read data on the GPU
- Loading branch information
Showing
22 changed files
with
816 additions
and
882 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,4 @@ | |
|
||
'''PopPUNK sketching functions''' | ||
|
||
__version__ = '1.7.3' | ||
__version__ = '1.7.4' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
|
||
#include "device_memory.cuh" | ||
#include "cuda.cuh" | ||
#include "gpu.hpp" | ||
|
||
// Sets up data structures and loads them onto the device | ||
DeviceMemory::DeviceMemory( | ||
SketchStrides &ref_strides, SketchStrides &query_strides, | ||
std::vector<Reference> &ref_sketches, | ||
std::vector<Reference> &query_sketches, const SketchSlice &sample_slice, | ||
const FlatRandom &flat_random, const std::vector<uint16_t> &ref_random_idx, | ||
const std::vector<uint16_t> &query_random_idx, | ||
const std::vector<size_t> &kmer_lengths, long long dist_rows, | ||
const bool self, const int cpu_threads) | ||
: _n_dists(dist_rows * 2), d_query_sketches(nullptr), | ||
d_query_random(nullptr) { | ||
// Set up reference sketches, flatten and copy to device | ||
std::vector<uint64_t> flat_ref = flatten_by_samples( | ||
ref_sketches, kmer_lengths, ref_strides, sample_slice.ref_offset, | ||
sample_slice.ref_offset + sample_slice.ref_size, cpu_threads); | ||
CUDA_CALL( | ||
cudaMalloc((void **)&d_ref_sketches, flat_ref.size() * sizeof(uint64_t))); | ||
CUDA_CALL(cudaMemcpy(d_ref_sketches, flat_ref.data(), | ||
flat_ref.size() * sizeof(uint64_t), cudaMemcpyDefault)); | ||
|
||
// Preload random match chances, which have already been flattened | ||
CUDA_CALL(cudaMalloc((void **)&d_random_table, | ||
std::get<1>(flat_random).size() * sizeof(float))); | ||
CUDA_CALL(cudaMemcpy(d_random_table, std::get<1>(flat_random).data(), | ||
std::get<1>(flat_random).size() * sizeof(float), | ||
cudaMemcpyDefault)); | ||
CUDA_CALL(cudaMalloc((void **)&d_ref_random, | ||
sample_slice.ref_size * sizeof(uint16_t))); | ||
CUDA_CALL( | ||
cudaMemcpy(d_ref_random, ref_random_idx.data() + sample_slice.ref_offset, | ||
sample_slice.ref_size * sizeof(uint16_t), cudaMemcpyDefault)); | ||
|
||
// If ref v query mode, also flatten query vector and copy to device | ||
if (!self) { | ||
std::vector<uint64_t> flat_query = flatten_by_bins( | ||
query_sketches, kmer_lengths, query_strides, sample_slice.query_offset, | ||
sample_slice.query_offset + sample_slice.query_size, cpu_threads); | ||
CUDA_CALL(cudaMalloc((void **)&d_query_sketches, | ||
flat_query.size() * sizeof(uint64_t))); | ||
CUDA_CALL(cudaMemcpy(d_query_sketches, flat_query.data(), | ||
flat_query.size() * sizeof(uint64_t), | ||
cudaMemcpyDefault)); | ||
|
||
CUDA_CALL(cudaMalloc((void **)&d_query_random, | ||
sample_slice.query_size * sizeof(uint16_t))); | ||
CUDA_CALL(cudaMemcpy( | ||
d_query_random, query_random_idx.data() + sample_slice.query_offset, | ||
sample_slice.query_size * sizeof(uint16_t), cudaMemcpyDefault)); | ||
} else { | ||
query_strides = ref_strides; | ||
} | ||
|
||
// Copy or set other arrays needed on device (kmers and distance output) | ||
std::vector<int> kmer_ints(kmer_lengths.begin(), kmer_lengths.end()); | ||
CUDA_CALL(cudaMalloc((void **)&d_kmers, kmer_ints.size() * sizeof(int))); | ||
CUDA_CALL(cudaMemcpy(d_kmers, kmer_ints.data(), | ||
kmer_ints.size() * sizeof(int), cudaMemcpyDefault)); | ||
|
||
CUDA_CALL(cudaMalloc((void **)&d_dist_mat, _n_dists * sizeof(float))); | ||
CUDA_CALL(cudaMemset(d_dist_mat, 0, _n_dists * sizeof(float))); | ||
} | ||
|
||
DeviceMemory::~DeviceMemory() { | ||
CUDA_CALL(cudaFree(d_ref_sketches)); | ||
CUDA_CALL(cudaFree(d_query_sketches)); | ||
CUDA_CALL(cudaFree(d_random_table)); | ||
CUDA_CALL(cudaFree(d_ref_random)); | ||
CUDA_CALL(cudaFree(d_query_random)); | ||
CUDA_CALL(cudaFree(d_kmers)); | ||
CUDA_CALL(cudaFree(d_dist_mat)); | ||
} | ||
|
||
std::vector<float> DeviceMemory::read_dists() { | ||
std::vector<float> dists(_n_dists); | ||
CUDA_CALL(cudaMemcpy(dists.data(), d_dist_mat, _n_dists * sizeof(float), | ||
cudaMemcpyDefault)); | ||
return dists; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#pragma once | ||
|
||
#include "reference.hpp" | ||
|
||
// Align structs | ||
// https://stackoverflow.com/a/12779757 | ||
#if defined(__CUDACC__) // NVCC | ||
#define ALIGN(n) __align__(n) | ||
#elif defined(__GNUC__) // GCC | ||
#define ALIGN(n) __attribute__((aligned(n))) | ||
#elif defined(_MSC_VER) // MSVC | ||
#define ALIGN(n) __declspec(align(n)) | ||
#else | ||
#error "Please provide a definition for MY_ALIGN macro for your host compiler!" | ||
#endif | ||
|
||
struct ALIGN(8) RandomStrides { | ||
size_t kmer_stride; | ||
size_t cluster_inner_stride; | ||
size_t cluster_outer_stride; | ||
}; | ||
|
||
typedef std::tuple<RandomStrides, std::vector<float>> FlatRandom; | ||
|
||
#ifdef GPU_AVAILABLE | ||
|
||
// Structure of flattened vectors | ||
struct ALIGN(16) SketchStrides { | ||
size_t bin_stride; | ||
size_t kmer_stride; | ||
size_t sample_stride; | ||
size_t sketchsize64; | ||
size_t bbits; | ||
}; | ||
|
||
struct ALIGN(8) SketchSlice { | ||
size_t ref_offset; | ||
size_t ref_size; | ||
size_t query_offset; | ||
size_t query_size; | ||
}; | ||
|
||
// Memory on device for each operation | ||
class DeviceMemory { | ||
public: | ||
DeviceMemory(SketchStrides &ref_strides, SketchStrides &query_strides, | ||
std::vector<Reference> &ref_sketches, | ||
std::vector<Reference> &query_sketches, | ||
const SketchSlice &sample_slice, const FlatRandom &flat_random, | ||
const std::vector<uint16_t> &ref_random_idx, | ||
const std::vector<uint16_t> &query_random_idx, | ||
const std::vector<size_t> &kmer_lengths, long long dist_rows, | ||
const bool self, const int cpu_threads); | ||
|
||
~DeviceMemory(); | ||
|
||
std::vector<float> read_dists(); | ||
|
||
uint64_t *ref_sketches() { return d_ref_sketches; } | ||
uint64_t *query_sketches() { return d_query_sketches; } | ||
float *random_table() { return d_random_table; } | ||
uint16_t *ref_random() { return d_ref_random; } | ||
uint16_t *query_random() { return d_query_random; } | ||
int *kmers() { return d_kmers; } | ||
float *dist_mat() { return d_dist_mat; } | ||
|
||
private: | ||
DeviceMemory(const DeviceMemory &) = delete; | ||
DeviceMemory(DeviceMemory &&) = delete; | ||
|
||
size_t _n_dists; | ||
uint64_t *d_ref_sketches; | ||
uint64_t *d_query_sketches; | ||
float *d_random_table; | ||
uint16_t *d_ref_random; | ||
uint16_t *d_query_random; | ||
int *d_kmers; | ||
float *d_dist_mat; | ||
}; | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
|
||
#include "cuda.cuh" | ||
#include "device_reads.cuh" | ||
|
||
DeviceReads::DeviceReads(const std::shared_ptr<SeqBuf> &seq_ptr, | ||
const size_t n_threads) | ||
: seq(seq_ptr), n_reads(seq->n_full_seqs()), read_length(seq->max_length()), | ||
current_block(0), buffer_filled(0), loaded_first(false) { | ||
// Set up buffer to load in reads (on host) | ||
size_t mem_free = 0; | ||
size_t mem_total = 0; | ||
CUDA_CALL(cudaMemGetInfo(&mem_free, &mem_total)); | ||
buffer_size = (mem_free * 0.9) / (read_length * sizeof(char)); | ||
buffer_blocks = | ||
std::floor(n_reads / (static_cast<double>(buffer_size) + 1)) + 1; | ||
if (buffer_size > n_reads) { | ||
buffer_size = n_reads; | ||
buffer_blocks = 1; | ||
} | ||
host_buffer.resize(buffer_size * read_length); | ||
CUDA_CALL_NOTHROW(cudaHostRegister(host_buffer.data(), | ||
host_buffer.size() * sizeof(char), | ||
cudaHostRegisterDefault)); | ||
|
||
// Buffer to store reads (on device) | ||
CUDA_CALL( | ||
cudaMalloc((void **)&d_reads, buffer_size * read_length * sizeof(char))); | ||
} | ||
|
||
DeviceReads::~DeviceReads() { | ||
CUDA_CALL_NOTHROW(cudaHostUnregister(host_buffer.data())); | ||
CUDA_CALL_NOTHROW(cudaFree(d_reads)); | ||
} | ||
|
||
bool DeviceReads::next_buffer() { | ||
bool success; | ||
if (current_block < buffer_blocks) { | ||
if (buffer_blocks > 1 || !loaded_first) { | ||
size_t start = current_block * buffer_size; | ||
size_t end = (current_block + 1) * buffer_size; | ||
if (end > seq->n_full_seqs()) { | ||
end = seq->n_full_seqs(); | ||
} | ||
buffer_filled = end - start; | ||
|
||
seq->load_seqs(host_buffer, start, end); | ||
CUDA_CALL(cudaMemcpyAsync(d_reads, host_buffer.data(), | ||
buffer_filled * read_length * sizeof(char), | ||
cudaMemcpyDefault)); | ||
loaded_first = true; | ||
} | ||
current_block++; | ||
success = true; | ||
} else { | ||
success = false; | ||
} | ||
return success; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#pragma once | ||
|
||
#include <memory> // unique_ptr | ||
#include <vector> | ||
|
||
#include "sketch/seqio.hpp" // SeqBuf | ||
|
||
class DeviceReads { | ||
public: | ||
DeviceReads(const std::shared_ptr<SeqBuf> &seq_in, const size_t n_threads); | ||
~DeviceReads(); | ||
|
||
bool next_buffer(); | ||
|
||
void reset_buffer() { current_block = 0; } | ||
char *read_ptr() { return d_reads; } | ||
size_t buffer_count() const { return buffer_filled; } | ||
size_t length() const { return read_length; } | ||
|
||
private: | ||
// delete copy and move to avoid accidentally using them | ||
DeviceReads(const DeviceReads &) = delete; | ||
DeviceReads(DeviceReads &&other) = delete; | ||
|
||
char *d_reads; | ||
std::vector<char> host_buffer; | ||
std::shared_ptr<SeqBuf> seq; | ||
|
||
size_t n_reads; | ||
size_t read_length; | ||
size_t buffer_size; | ||
size_t buffer_blocks; | ||
size_t current_block; | ||
size_t buffer_filled; | ||
bool loaded_first; | ||
}; |
Oops, something went wrong.