Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 114 #121

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 22 additions & 8 deletions src/call_consensus_pileup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ bool compare_allele_depth(const allele &a, const allele &b){
return b.depth < a.depth;
}

ret_t get_consensus_allele(std::vector<allele> ad, uint8_t min_qual, double threshold, char gap, double min_insert_threshold){
//print_allele_depths(ad);
ret_t get_consensus_allele(std::vector<allele> ad, uint8_t min_qual, double threshold, char gap, double min_insert_threshold, double insert_threshold){
//insert threshold refers to insertion > 1bp in length
//min insert threshold refers to 1bp SNV
ret_t t;
t.nuc=gap;
t.q = min_qual+33;
Expand All @@ -32,7 +33,8 @@ ret_t get_consensus_allele(std::vector<allele> ad, uint8_t min_qual, double thre
std::vector<allele> nuc_pos;
allele tmp_a;
char n;
uint32_t max_l = 0, max_depth = 0, tmp_depth = 0, cur_depth = 0, total_max_depth = 0, gap_depth = 0, total_indel_depth = 0;

uint32_t max_l = 0, max_depth = 0, tmp_depth = 0, cur_depth = 0, total_max_depth = 0, gap_depth = 0, total_indel_depth = 0, insert_gap_depth=0;
uint8_t ambg_n = 1, ctr = 0;
double q = 0, tq = 0, cur_threshold = 0;
std::vector<allele>::iterator it;
Expand Down Expand Up @@ -124,12 +126,22 @@ ret_t get_consensus_allele(std::vector<allele> ad, uint8_t min_qual, double thre
//print_allele_depths(ad);
//std::cout << "\n";
//gap_depth = (total_indel_depth > cur_depth) ? total_indel_depth - cur_depth : 0;
//this describes the threshold for an SNV
gap_depth = min_insert_threshold * total_max_depth;
//std::cout << max_depth << " " << gap_depth << " " << min_insert_threshold << " " << total_max_depth<<" \n";
}
//this describes the threshold for an insertion > 1bp
insert_gap_depth = insert_threshold * total_max_depth;
}
else
gap_depth = 0; // For first position of allele
if(n!='*' && max_depth >= gap_depth){ // TODO: Check what to do when equal.{

//if we have multiple insertions in a row, we handle it differently
if(max_l > 2 && n!='*' && max_depth >= insert_gap_depth){
t.nuc += n;
q += 0.5; // For rounding before converting to int
t.q += (((uint8_t)q)+33);

}
else if(max_l <= 2 && n!='*' && max_depth >= gap_depth){ // TODO: Check what to do when equal.
t.nuc += n;
q += 0.5; // For rounding before converting to int
t.q += (((uint8_t)q)+33);
Expand All @@ -138,7 +150,9 @@ ret_t get_consensus_allele(std::vector<allele> ad, uint8_t min_qual, double thre
return t;
}

int call_consensus_from_plup(std::istream &cin, std::string seq_id, std::string out_file, uint8_t min_qual, double threshold, uint8_t min_depth, char gap, bool min_coverage_flag, double min_insert_threshold){
int call_consensus_from_plup(std::istream &cin, std::string seq_id, std::string out_file, uint8_t min_qual, double threshold, uint8_t min_depth, char gap, bool min_coverage_flag, double min_insert_threshold, double insert_threshold){
// min_insert is for SNV insertion
// insert_threshold is for longer than 1bp insertions
std::string line, cell;
std::ofstream fout((out_file+".fa").c_str());
std::ofstream tmp_qout((out_file+".qual.txt").c_str());
Expand Down Expand Up @@ -196,7 +210,7 @@ int call_consensus_from_plup(std::istream &cin, std::string seq_id, std::string
ret_t t;
if(mdepth >= min_depth){
ad = update_allele_depth(ref, bases, qualities, min_qual);
t = get_consensus_allele(ad, min_qual, threshold, gap, min_insert_threshold);
t = get_consensus_allele(ad, min_qual, threshold, gap, min_insert_threshold, insert_threshold);
fout << t.nuc;
tmp_qout << t.q;
} else{
Expand Down
4 changes: 2 additions & 2 deletions src/call_consensus_pileup.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ struct ret_t {
};

void format_alleles(std::vector<allele> &ad);
int call_consensus_from_plup(std::istream &cin, std::string seq_id, std::string out_file, uint8_t min_qual, double threshold, uint8_t min_depth, char gap, bool min_coverage_flag, double min_insert_threshold);
ret_t get_consensus_allele(std::vector<allele> ad, uint8_t min_qual, double threshold, char gap, double min_insert_threshold);
int call_consensus_from_plup(std::istream &cin, std::string seq_id, std::string out_file, uint8_t min_qual, double threshold, uint8_t min_depth, char gap, bool min_coverage_flag, double min_insert_threshold, double insert_threshold);
ret_t get_consensus_allele(std::vector<allele> ad, uint8_t min_qual, double threshold, char gap, double min_insert_threshold, double insert_threshold);

#endif
19 changes: 13 additions & 6 deletions src/ivar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ struct args_t {
uint8_t min_qual; // -q
uint8_t sliding_window; // -s
double min_threshold; // -t
double min_insert_threshold; // -c
double insert_threshold; // -c
double snv_threshold; // -z
int min_length; // -m
std::string f1; // -1
std::string f2; // -2
Expand Down Expand Up @@ -120,6 +121,7 @@ void print_consensus_usage(){
" 0.9 | Strict or bases that make up atleast 90% of the depth at a position\n"
" 1 | Identical or bases that make up 100% of the depth at a position. Will have highest ambiguities\n"
" -c Minimum insertion frequency threshold(0 - 1) to call consensus. (Default: 0.8)\n"
" -z Minimum threshold(0-1) for SNV insertion to call consensus. (Deagult: 0.8)\n"
" Frequently used thresholds | Description\n"
" ---------------------------|------------\n"
" 0 | Allow insertion if it appears even once\n"
Expand Down Expand Up @@ -178,7 +180,7 @@ void print_version_info(){

static const char *trim_opt_str = "i:b:f:x:p:m:q:s:ekh?";
static const char *variants_opt_str = "p:t:q:m:r:g:h?";
static const char *consensus_opt_str = "i:p:q:t:c:m:n:kh?";
static const char *consensus_opt_str = "i:p:q:t:c:z:m:n:kh?";
static const char *removereads_opt_str = "i:p:t:b:h?";
static const char *filtervariants_opt_str = "p:t:f:h?";
static const char *getmasked_opt_str = "i:b:f:p:h?";
Expand Down Expand Up @@ -348,14 +350,18 @@ int main(int argc, char* argv[]){
g_args.gap = 'N';
g_args.min_qual = 20;
g_args.keep_min_coverage = true;
g_args.min_insert_threshold = 0.8;
g_args.insert_threshold = 0.8;
g_args.snv_threshold = 0.8;
while( opt != -1 ) {
switch( opt ) {
case 't':
g_args.min_threshold = atof(optarg);
break;
case 'c':
g_args.min_insert_threshold = atof(optarg);
g_args.insert_threshold = atof(optarg);
break;
case 'z':
g_args.snv_threshold = atof(optarg);
break;
case 'i':
g_args.seq_id = optarg;
Expand Down Expand Up @@ -399,12 +405,13 @@ int main(int argc, char* argv[]){
std::cout <<"Minimum Quality: " << (uint16_t) g_args.min_qual << std::endl;
std::cout << "Threshold: " << g_args.min_threshold << std::endl;
std::cout << "Minimum depth: " << (unsigned) g_args.min_depth << std::endl;
std::cout << "Minimum Insert Threshold: " << g_args.min_insert_threshold << std::endl;
std::cout << "Minimum SNV Threshold: " << g_args.snv_threshold << std::endl;
std::cout << "Minimum Insert Threshold: " << g_args.insert_threshold << std::endl;
if(!g_args.keep_min_coverage)
std::cout << "Regions with depth less than minimum depth will not added to consensus" << std::endl;
else
std::cout << "Regions with depth less than minimum depth covered by: " << g_args.gap << std::endl;
res = call_consensus_from_plup(std::cin, g_args.seq_id, g_args.prefix, g_args.min_qual, g_args.min_threshold, g_args.min_depth, g_args.gap, g_args.keep_min_coverage, g_args.min_insert_threshold);
res = call_consensus_from_plup(std::cin, g_args.seq_id, g_args.prefix, g_args.min_qual, g_args.min_threshold, g_args.min_depth, g_args.gap, g_args.keep_min_coverage, g_args.snv_threshold, g_args.insert_threshold);
} else if (cmd.compare("removereads") == 0){
opt = getopt( argc, argv, removereads_opt_str);
while( opt != -1 ) {
Expand Down
8 changes: 4 additions & 4 deletions tests/test_call_consensus_from_plup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,15 @@ int main() {
for(int i = 0;i<size;i++){
ad.at(i) = arr[i];
}
s = get_consensus_allele(ad, 0, 0, 'N', 0.5);
s = get_consensus_allele(ad, 0, 0, 'N', 0.5, 0.5);
success += (s.nuc.compare("DW") == 0) ? 1: 0;
success += (s.q.compare("8+") == 0) ? 1 : 0;
ad.push_back(a6);
s = get_consensus_allele(ad, 0, 0, 'N', 0);
s = get_consensus_allele(ad, 0, 0, 'N', 0, 0);
success += (s.nuc.compare("DAS") == 0) ? 1: 0;
success += (s.q.compare("8++") == 0) ? 1 : 0;
ad.push_back(a7);
s = get_consensus_allele(ad, 0, 0, 'N', 0);
s = get_consensus_allele(ad, 0, 0, 'N', 0, 0);
success += (s.nuc.compare("DAB") == 0) ? 1: 0;
success += (s.q.compare("8++") == 0) ? 1 : 0;
allele a8 = {
Expand Down Expand Up @@ -127,7 +127,7 @@ int main() {
allele del_arr[] = {a8, a9, a10, a11};
ret_t del_s;
std::vector<allele> del_ad(del_arr, del_arr+sizeof(del_arr)/sizeof(allele));
s = get_consensus_allele(del_ad, 0, 0, 'N', 1);
s = get_consensus_allele(del_ad, 0, 0, 'N', 1, 1);
success += (s.nuc.compare("") == 0) ? 1: 0;
success += (s.q.compare("") == 0) ? 1 : 0;
return (success == num_tests) ? 0 : -1;
Expand Down
2 changes: 1 addition & 1 deletion tests/test_consensus_min_depth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
int call_cns_check_outfile(std::string input_id, std::string prefix, std::string cns, char gap, bool call_min_depth, int min_depth){
std::string path = "../data/test.gap.sorted.mpileup";
std::ifstream mplp(path);
call_consensus_from_plup(mplp, input_id, prefix, 20, 0, min_depth, gap, call_min_depth, 1);
call_consensus_from_plup(mplp, input_id, prefix, 20, 0, min_depth, gap, call_min_depth, 1, 1);
std::ifstream outFile(prefix+".fa");
std::string l;
getline(outFile, l); // Ignore first line
Expand Down
56 changes: 51 additions & 5 deletions tests/test_consensus_min_insert_threshold.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "../src/allele_functions.h"

int main() {
int num_tests = 8;
int num_tests = 10;
allele a1 = {
"T",
8,
Expand Down Expand Up @@ -40,25 +40,71 @@ int main() {
ad.at(i) = arr[i];
}

s = get_consensus_allele(ad,20,.8, 'N', 0);
s = get_consensus_allele(ad,20,.8, 'N', 0, 0);
std::cout << s.nuc << ": " << s.q << std::endl;
success += (s.nuc.compare("TN") == 0) ? 1: 0;
success += (s.q.compare("??") == 0) ? 1 : 0;

s = get_consensus_allele(ad,20,.8, 'N', 1);
s = get_consensus_allele(ad,20,.8, 'N', 1, 1);
std::cout << s.nuc << ": " << s.q << std::endl;
success += (s.nuc.compare("T") == 0) ? 1: 0;
success += (s.q.compare("?") == 0) ? 1 : 0;

s = get_consensus_allele(ad,20,.8, 'N',.6);
s = get_consensus_allele(ad,20,.8, 'N',.6, 0.6);
std::cout << s.nuc << ": " << s.q << std::endl;
success += (s.nuc.compare("TN") == 0) ? 1: 0;
success += (s.q.compare("??") == 0) ? 1 : 0;

s = get_consensus_allele(ad,20,.8, 'N',.8);
s = get_consensus_allele(ad,20,.8, 'N',.8, 0.8);
std::cout << s.nuc << ": " << s.q << std::endl;
success += (s.nuc.compare("T") == 0) ? 1: 0;
success += (s.q.compare("?") == 0) ? 1 : 0;

//adding a test to check snv against insertion threshold
allele a3 = {
"T",
5,
0,
30,
0,
0,
0
};
allele a4 = {
"+GGGGGGG",
3,
0,
30,
0,
0,
0
};
allele a5 = {
"A",
2,
0,
30,
0,
0,
0
};

allele arr1[] = {a3, a4, a5};
std::vector<allele> ad1(arr1, arr1+sizeof(arr1)/sizeof(allele));

for(int i = 0;i<size;i++){
ad1.at(i) = arr1[i];
}

//not bothering with single insertion, should be checked elsewhere
//first param is 1bp second is >1bp
s = get_consensus_allele(ad1,20,0, 'N',0.8, 0);
std::cout << s.nuc << ": " << s.q << std::endl;
success += (s.nuc.compare("TGGGGGGG") == 0) ? 1: 0;

s = get_consensus_allele(ad1,20, 0, 'N',0, 0.8);
std::cout << s.nuc << ": " << s.q << std::endl;
success += (s.nuc.compare("T") == 0) ? 1: 0;

return (success == num_tests) ? 0 : -1;
}
2 changes: 1 addition & 1 deletion tests/test_consensus_seq_id.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ int call_cns_check_outfile(std::string seq_id, std::string prefix, char gap, boo
std::string path = "../data/test.gap.sorted.mpileup";
std::string expctd_hdr = "";
std::ifstream mplp(path);
call_consensus_from_plup(mplp, seq_id, prefix, 20, 0, min_depth, gap, call_min_depth, 1);
call_consensus_from_plup(mplp, seq_id, prefix, 20, 0, min_depth, gap, call_min_depth, 1, 1);
std::ifstream outFile(prefix+".fa");
std::string l;
getline(outFile, l); // header
Expand Down
6 changes: 3 additions & 3 deletions tests/test_consensus_threshold.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,17 @@ int main() {
for(int i = 0;i<size;i++){
ad.at(i) = arr[i];
}
s = get_consensus_allele(ad,20,.6, 'N', 0.6);
s = get_consensus_allele(ad,20,.6, 'N', 0.6, 0.6);
std::cout << s.nuc << ": " << s.q << std::endl;
success += (s.nuc.compare("A") == 0) ? 1: 0;
success += (s.q.compare("?") == 0) ? 1 : 0;
s = get_consensus_allele(ad,20,.7, 'N', 1);
s = get_consensus_allele(ad,20,.7, 'N', 1, 1);
std::cout << s.nuc << ": " << s.q << std::endl;
success += (s.nuc.compare("N") == 0) ? 1: 0;
success += (s.q.compare("?") == 0) ? 1 : 0;
ad.erase(ad.begin() + 4, ad.begin()+5);
ad.push_back(a6);
s = get_consensus_allele(ad,20,.7, 'N',1);
s = get_consensus_allele(ad,20,.7, 'N',1, 1);
std::cout << s.nuc << ": " << s.q << std::endl;
success += (s.nuc.compare("R") == 0) ? 1: 0;
success += (s.q.compare("?") == 0) ? 1 : 0;
Expand Down