Skip to content

Commit

Permalink
Merge pull request #72 from bacpop/v2.0.0_candidate
Browse files Browse the repository at this point in the history
Update to v2: new CLI
  • Loading branch information
johnlees authored Apr 29, 2022
2 parents e5b11eb + 1e8cb9b commit 9c416f7
Show file tree
Hide file tree
Showing 41 changed files with 1,585 additions and 1,152 deletions.
103 changes: 59 additions & 44 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,33 @@ set(TARGET_NAME pp_sketchlib)
add_compile_definitions(PYTHON_EXT)

# gcc: Add openmp
# gcc: Add -O0 to remove optimizations when using debug
IF(CMAKE_COMPILER_IS_GNUCC)
IF(CMAKE_COMPILER_IS_GNUCC OR "$ENV{SKETCHLIB_INSTALL}" STREQUAL "conda")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")
ENDIF(CMAKE_COMPILER_IS_GNUCC)
ENDIF()

# Add -O0 to remove optimizations when using debug
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")

if(UNIX AND NOT APPLE)
if(CMAKE_CXX_COMPILER STREQUAL "icpc")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fast -xCASCADELAKE -DMKL_ILP64 -m64 -static-intel")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fast -march=\"native\" -DMKL_ILP64 -m64 -static-intel")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS")
set(CMAKE_LD_FLAGS "${CMAKE_LDFLAGS} -Wl,--as-needed")
endif()
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ffast-math -funroll-loops -m64")

# Set paths for non standard lib/ and include/ locations
if(DEFINED ENV{CONDA_PREFIX})
include_directories($ENV{CONDA_PREFIX}/include)
link_directories($ENV{CONDA_PREFIX}/lib)
link_directories($ENV{CONDA_PREFIX}/lib/intel64)
else()
find_package(OpenMP)
endif()

# Add libraries
Expand All @@ -50,12 +56,12 @@ include_directories(${HDF5_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/vendor/highfive/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)

find_package(ZLIB)
execute_process(COMMAND pybind11-config --cmakedir OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE pybind11_DIR)
find_package(pybind11 2.6 CONFIG REQUIRED)
find_package(Eigen3 3.3 REQUIRED NO_MODULE)
find_package(Armadillo REQUIRED)
include_directories(${ARMADILLO_INCLUDE_DIRS})
#find_package(OpenMP) # This links system openmp if present - conda sorts out rpath but take care

# Define python library target
add_library("${TARGET_NAME}" MODULE)
Expand All @@ -64,42 +70,46 @@ add_library("${TARGET_NAME}" MODULE)
include(CheckLanguage)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
message(STATUS "CUDA found, compiling both GPU and CPU code")
enable_language(CUDA)

# PIC/relocatable-device-code needed as this is linked by gcc later
# -Xptxas -dlcm=ca turns cache on, but not needed in recent nvcc versions
# --cudart static: static linking of the CUDA libraries
# -gencode arch=compute_35 etc compiles for each (minimum) device version listed (v3.5, v5.0, v7.5)
set(CUDA_OPTS "-Xcompiler -fPIC -Xptxas -dlcm=ca --relocatable-device-code=true --expt-relaxed-constexpr")
# Turn on link time optimisation if available
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER 11.0 AND CMAKE_BUILD_TYPE MATCHES Release)
string(APPEND CUDA_OPTS " -dlto -arch=sm_86")
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER 11.0)
message(STATUS "CUDA >11.0 found, compiling both GPU and CPU code")

# PIC/relocatable-device-code needed as this is linked by gcc later
# -Xptxas -dlcm=ca turns cache on, but not needed in recent nvcc versions
# --cudart static: static linking of the CUDA libraries
# -gencode arch=compute_35 etc compiles for each (minimum) device version listed (v3.5, v5.0, v7.5)
set(CUDA_OPTS "-Xcompiler -fPIC -Xptxas -dlcm=ca --relocatable-device-code=true --expt-relaxed-constexpr")
# Turn on link time optimisation if available
if(CMAKE_BUILD_TYPE MATCHES Release)
string(APPEND CUDA_OPTS " -dlto -arch=sm_86")
else()
string(APPEND CUDA_OPTS " -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86")
endif()
if(CMAKE_BUILD_TYPE MATCHES Debug)
string(APPEND CUDA_OPTS " -G")
endif()

set(CMAKE_CUDA_FLAGS "${CUDA_OPTS}")

add_compile_definitions(GPU_AVAILABLE)
add_library("${TARGET_NAME}_CUDA" OBJECT src/gpu/dist.cu
src/gpu/sketch.cu
src/gpu/device_memory.cu
src/gpu/gpu_countmin.cu
src/gpu/device_reads.cu)
target_include_directories("${TARGET_NAME}_CUDA" PRIVATE "${EIGEN3_INCLUDE_DIR}" "${pybind11_INCLUDE_DIRS}")
set_property(TARGET "${TARGET_NAME}_CUDA"
PROPERTY POSITION_INDEPENDENT_CODE ON
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON # try and ensure device link with nvcc
CUDA_VISIBILITY_PRESET "hidden"
CUDA_RUNTIME_LIBRARY Static)
#CUDA_ARCHITECTURES OFF) # set off as done explicitly above (due to dlto complexities)
# CPU code/gcc compiled code needed by cuda lib
target_sources("${TARGET_NAME}" PRIVATE src/gpu/gpu_api.cpp)
else()
string(APPEND CUDA_OPTS " -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86")
endif()
if(CMAKE_BUILD_TYPE MATCHES Debug)
string(APPEND CUDA_OPTS " -G")
message(STATUS "CUDA >=11.0 required, compiling CPU code only")
endif()

set(CMAKE_CUDA_FLAGS "${CUDA_OPTS}")

add_compile_definitions(GPU_AVAILABLE)
add_library("${TARGET_NAME}_CUDA" OBJECT src/gpu/dist.cu
src/gpu/sketch.cu
src/gpu/device_memory.cu
src/gpu/gpu_countmin.cu
src/gpu/device_reads.cu)
target_include_directories("${TARGET_NAME}_CUDA" PRIVATE "${EIGEN3_INCLUDE_DIR}" "${pybind11_INCLUDE_DIRS}")
set_property(TARGET "${TARGET_NAME}_CUDA"
PROPERTY POSITION_INDEPENDENT_CODE ON
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON # try and ensure device link with nvcc
CUDA_VISIBILITY_PRESET "hidden"
CUDA_RUNTIME_LIBRARY Static)
#CUDA_ARCHITECTURES OFF) # set off as done explicitly above (due to dlto complexities)
# CPU code/gcc compiled code needed by cuda lib
target_sources("${TARGET_NAME}" PRIVATE src/gpu/gpu_api.cpp)
else()
message(STATUS "CUDA not found, compiling CPU code only")
endif()
Expand Down Expand Up @@ -133,7 +143,12 @@ if(CMAKE_CUDA_COMPILER)
set_property(TARGET "${TARGET_NAME}" PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
#set_property(TARGET "${TARGET_NAME}" PROPERTY CUDA_ARCHITECTURES OFF)
endif()
target_link_libraries("${TARGET_NAME}" PRIVATE pybind11::module Eigen3::Eigen z ${HDF5_LIBRARIES} gomp openblas lapack gfortran m dl)
#if(OpenMP_CXX_FOUND)
# target_link_libraries("${TARGET_NAME}" PRIVATE OpenMP::OpenMP_CXX)
#endif()
target_link_libraries("${TARGET_NAME}" PRIVATE pybind11::module Eigen3::Eigen ${HDF5_LIBRARIES} openblas lapack gfortran m dl)
if(DEFINED ENV{CONDA_PREFIX} AND (NOT APPLE OR CMAKE_COMPILER_IS_GNUCC OR ENV{SKETCHLIB_INSTALL} EQUAL "conda"))
target_link_libraries("${TARGET_NAME}" PRIVATE gomp z)
else()
target_link_libraries("${TARGET_NAME}" PRIVATE ZLIB::ZLIB)
if(OpenMP_CXX_FOUND)
target_link_libraries("${TARGET_NAME}" PRIVATE OpenMP::OpenMP_CXX)
endif()
endif()
119 changes: 25 additions & 94 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# pp-sketchlib <img src='sketchlib_logo.png' align="right" height="139" />

<!-- badges: start -->
[![Build status](https://dev.azure.com/jlees/pp-sketchlib/_apis/build/status/johnlees.pp-sketchlib?branchName=master)](https://dev.azure.com/jlees/pp-sketchlib/_build/latest?definitionId=1&branchName=master)
[![Build Status](https://dev.azure.com/jlees/pp-sketchlib/_apis/build/status/bacpop.pp-sketchlib?branchName=master)](https://dev.azure.com/jlees/pp-sketchlib/_build/latest?definitionId=4&branchName=master)
[![Build status](https://badge.buildkite.com/b1bc9ccd16211ca5a55846b95e297554e5aa3b544d8cb752b0.svg?branch=master;theme=github)](https://buildkite.com/mrc-ide/pp-sketchlib)
[![Anaconda package](https://anaconda.org/conda-forge/pp-sketchlib/badges/version.svg)](https://anaconda.org/conda-forge/pp-sketchlib)
<!-- badges: end -->


Library of sketching functions used by [PopPUNK](https://www.poppunk.net>).
Library of sketching functions used by [PopPUNK](https://www.poppunk.net>). See documentation at http://poppunk.readthedocs.io/en/latest/sketching.html

## Installation

Expand Down Expand Up @@ -67,7 +68,7 @@ installed (tested on 10.2 and 11.0).
Create a set of sketches and save these as a database:

```
poppunk_sketch --sketch --rfile rfiles.txt --ref-db listeria --sketch-size 10000 --cpus 4 --min-k 15 --k-step 2
sketchlib sketch -l rfiles.txt -o listeria --cpus 4
```

The input file `rfiles.txt` has one sequence per line. The first column is the sample name, subsequent tab-separated
Expand All @@ -79,52 +80,47 @@ sample2 sample2.fa
sample3 sample3_1.fq.gz sample3_2.fq.gz
```

Calculate core and accessory distances between databases with `--query`. If all-vs-all, only the upper triangle is calculated,
Calculate core and accessory distances between databases with `query dist`. If all-vs-all, only the upper triangle is calculated,
for example:

```
poppunk_sketch --query --ref_db listeria --query_db listeria --cpus 4
poppunk_sketch query dist listeria --cpus 4
```

This will save output files as a database for use with PopPUNK. If you wish to output the
distances add the `--print` option:

```
poppunk_sketch --query --ref_db listeria --query_db listeria --cpus 4 --print > distances.txt
```
This will print the distances to STDOUT and can be captured with `>`. If you wish to output save output files as a database for use with PopPUNK.add the `-o` option.

### Other options

Sketching:

- `--strand` ignores reverse complement k-mers, if input is all in the same sense
- `--single-strand` ignores reverse complement k-mers, if input is all in the same sense
- `--min-count` minimum k-mer count to include when using reads
- `--exact-counter` uses a hash table to count k-mers, which is recommended for non-bacterial datasets.

Query:

- To only use some of the samples in the sketch database, you can add the `--subset` option with a file which lists the required sample names.
- `--jaccard` will output the Jaccard distances, rather than core and accessory distances.
- `query jaccard` will output the Jaccard distances, rather than core and accessory distances.
- `query sparse` will output a sparse distance matrix,
using either a `--threshold` or the k-nearest (`-kNN`).

### Large datasets

When working with large datasets, you can increase the `--cpus` to high numbers and get
a roughly proportional performance increase.

For calculating sketches of read datasets, or large numbers of distances, and you have a CUDA compatible GPU,
you can calculate distances on your graphics device even more quickly. Add the `--use-gpu` option:
you can calculate distances on your graphics device even more quickly. Add the `--gpu` option with the desired
device ID:

```
poppunk_sketch --sketch --rfile rfiles.txt --ref-db listeria --cpus 4 --use-gpu
poppunk_sketch --query --ref-db listeria --query-db listeria --use-gpu
sketchlib sketch -l rfiles.txt -o listeria --cpus 4 --gpu 0
sketchlib query dist listeria --gpu 0
```

Both CPU parallelism and the GPU will be used, so be sure to add
both `--cpus` and `--use-gpu` for maximum speed. This is particularly efficient
when sketching.

You can set the `--gpu-id` if you have more than one device, which may be necessary on
cluster systems. This mode can also benefit from having multiple CPU cores available too.
both `--cpus` and `--gpu` for maximum speed. This is particularly efficient
when sketching reads.

### Benchmarks

Expand Down Expand Up @@ -193,12 +189,16 @@ contain `sketch` and may contain `random`. Run `h5dump` to see the full contents
Contents are programmatically accessible with any HDF5 API. See `__main__.py` for an
example in python.

See `poppunk_db_info` from the [PopPUNK](https://github.com/johnlees/PopPUNK) package for pretty printing.

#### sketch

Attributes:

- `sketch_version` - version of sketching code used to create the database.
The SHA1 hash of relevant code files (doesn't change with every commit).
- `codon_phased` - 1 if codon-phased seeds were used.
- `reverse_complement` - 0 if `--single-strand`.

Contains a group for each sample, within each has attributes:

Expand Down Expand Up @@ -230,78 +230,6 @@ Datasets:
- `table_keys` - sample order of `table_values`.
- `table_values` - centroid ID assigned to each sample.

C++
---
I have yet to set up a proper namespace for this, but you can include this
code (`api.hpp` will do most functions) and use the parts you need. If you
are interested in this becoming more functional, please raise an issue.

See `main.cpp` for examples:

```
#include <fstream>
#include <iostream>
#include "reference.hpp"
#include "database.hpp"
#include "random_match.hpp"
#include "api.hpp"
// Set k-mer lengths
std::vector<size_t> kmer_lengths {15, 17, 19, 21, 23, 25, 27, 29};
// Create a two sketches
Reference ref(argv[1], {argv[2]}, kmer_lengths, 156, true, 0, false);
Reference query(argv[3], {argv[4]}, kmer_lengths, 156, true, 0, false);
// Use default random match chances
RandomMC random(true);
// Output some distances at a single k-mer length
std::cout << ref.jaccard_dist(query, 15, random) << std::endl;
std::cout << ref.jaccard_dist(query, 29, random) << std::endl;
// Calculate core and accessory distances between two sketches
auto core_acc = ref.core_acc_dist<RandomMC>(query, random);
std::cout << std::get<0>(core_acc) << "\t" << std::get<1>(core_acc) << std::endl;
// Save sketches to file
Database sketch_db("sketch.h5");
sketch_db.add_sketch(ref);
sketch_db.add_sketch(query);
// Read sketches from file
Reference ref_read = sketch_db.load_sketch(argv[1]);
Reference query_read = sketch_db.load_sketch(argv[3]);
// Create sketches using multiple threads, saving to file
std::vector<Reference> ref_sketches = create_sketches("full",
{argv[1], argv[3]},
{{argv[2]}, {argv[4]}},
kmer_lengths,
156,
true,
0,
false,
2);
// Calculate distances between sketches using multiple threads
MatrixXf dists = query_db(ref_sketches,
ref_sketches,
kmer_lengths,
random,
false,
2);
std::cout << dists << std::endl;
// Read sketches from an existing database, using random access
HighFive::File h5_db("listeria.h5");
Database listeria_db(h5_db);
std::vector<Reference> listeria_sketches;
for (auto name_it = names.cbegin(); name_it != names.cend(); name_it++)
{
listeria_sketches.push_back(listeria_db.load_sketch(*name_it));
}
```

## Algorithms

### Sketching
Expand Down Expand Up @@ -363,7 +291,7 @@ Blais & Blanchette is used (formula 6 in the paper cited below).
sketch each separately and join the databases.
- GPU sketching filters out any read containing an N, which may give slightly
different results from the CPU code.
- GPU sketching with variable read lengths is untested, but theoretically supported.
- GPU sketching with variable read lengths is unsupported. Illumina data only for now!
- GPU distances use lower precision than the CPU code, so slightly different results
are expected.

Expand Down Expand Up @@ -427,6 +355,9 @@ Modifiers:
- `PROFILE=1` runs with profiler flags for `ncu` and `nsys`
- `GPU=1` also build CUDA code (assumes `/usr/local/cuda-11.1/` and SM v8.6)

### azure
The repository key for the ubuntu CUDA install is periodically updated, which may cause build failures. See https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/ and update in `azure-pipelines.yml`.

### Test that Python can build an installable package

Build a python source package and install it into an empty docker container with vanilla python 3. If this works, then there's a good chance that the version uploaded to pypi will work
Expand Down
4 changes: 2 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ steps:
- script: |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
sudo apt-get update
sudo apt-get -y install cuda=11.2.2-1
Expand All @@ -54,5 +54,5 @@ steps:
export CUDA_HOME=/usr/local/cuda-11.2
export PATH=${CUDA_HOME}/bin${PATH:+:${PATH}}
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
cd test && python run_test.py --no-cpp
cd test && python run_test.py
displayName: 'Run tests (run_test.py)'
2 changes: 1 addition & 1 deletion docker/test
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ HERE=$(dirname $0)
[ ! -z $(docker images -q $TAG_SHA) ] || docker pull $TAG_SHA

## Just check that we can bring up the container and run something
docker run -it -w /src --rm $TAG_SHA poppunk_sketch --version
docker run -it -w /src --rm $TAG_SHA sketchlib --version
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dependencies:
- pip
- numpy
- scipy
- docopt
- cmake >= 3.12
- pybind11
- zlib
Expand All @@ -21,4 +22,4 @@ dependencies:
- armadillo
- libgfortran-ng
- nvcc_linux-64
- cudatoolkit==11.2
- cudatoolkit==11.2 # This is pinned due to version install on azure, see azure-pipelines.yml
2 changes: 1 addition & 1 deletion pp_sketch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

'''PopPUNK sketching functions'''

__version__ = '1.7.6.2'
__version__ = '2.0.0'
Loading

0 comments on commit 9c416f7

Please sign in to comment.