Skip to content

Commit b84daec

Browse files
Shriram BhosleShriram Bhosle
authored andcommitted
add virus detection
1 parent ab826cf commit b84daec

File tree

3 files changed

+85
-11
lines changed

3 files changed

+85
-11
lines changed

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
1-
# my_scripts
1+
##merge2files
22

33
Script to compare and merge two files based on user specified matched columns.
44
for usage:
55
merge2files.pl -h
6+
7+
##runVirusdetection
8+
* Tool to detect virus or bacterial sequences in tumour samples using GOTTCHA
9+
* Tracey Allen K. Freitas, Po-E Li, Matthew B. Scholz and Patrick S. G. Chain (2015) Accurate read-based metagenome characterization using a hierarchical suite of unique signatures, Nucleic Acids Research (DOI: 10.1093/nar/gkv180)
10+
* Download latest signature database for specific organism from : ftp://ftp.lanl.gov/public/genome/gottcha/GOTTCHA_database_v20150825/
11+
* Also needs lookup database GOTTCHA_lookup.tar.gz
12+
* runVirusdetection.sh

merge2files.pl

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,39 @@
99

1010
my ($options) = option_builder();
1111

12+
13+
if(!-s $options->{'f1'} && !-s $options->{'f2'}) {
14+
warn "Unable to find files\n";
15+
exit;
16+
}
17+
1218
open(my $fh1, $options->{'f1'}) || warn "print $!";
1319
open(my $fh2, $options->{'f2'}) || warn "print $!";
1420

21+
22+
23+
1524
my ($f1,$d1,$s1) = fileparse($options->{'f1'},qr/\.[^.]*/);
1625
my ($f2,$d2,$s2) = fileparse($options->{'f2'},qr/\.[^.]*/);
1726

1827
my $col_f1=$options->{'c1'};
1928
my $col_f2=$options->{'c2'};
20-
$options->{'hdr'}=0 unless($options->{'hdr'});
29+
$options->{'hdr1'}=0 unless($options->{'hdr1'});
30+
$options->{'hdr2'}=0 unless($options->{'hdr2'});
2131
chomp $col_f1;
2232
chomp $col_f2;
2333

34+
2435
# create file handle for filed to write [ common columns and different columns
25-
open(my $fh_common, '>', $f1.'_'.$f2.'common'.$s1);
36+
open(my $fh_common, '>', $options->{'o'}.'/'.$f1.'_'.$f2.'common'.$s1);
2637

2738
# create hash of both the files....
2839

29-
my ($file1_hash)=get_file_hash($fh1,$col_f1,$options->{'hdr'},$f1);
30-
my ($file2_hash)=get_file_hash($fh2,$col_f2,$options->{'hdr'},$f2);
40+
my ($file1_hash)=get_file_hash($fh1,$col_f1,$options->{'hdr1'},$f1);
41+
my ($file2_hash)=get_file_hash($fh2,$col_f2,$options->{'hdr2'},$f2);
3142

3243
#combine files
33-
combine_hash($file1_hash,$file2_hash,$fh_common,$options->{'hdr'});
44+
combine_hash($file1_hash,$file2_hash,$fh_common,$options->{'hdr1'});
3445

3546
# sub to combine file data...
3647
sub combine_hash {
@@ -45,6 +56,7 @@ sub combine_hash {
4556
# write common rows
4657
foreach my $key(keys %$hash_f1) {
4758
next if $key eq 'header';
59+
next if $key eq 'pre_header_lines';
4860
if ($hash_f2->{$key}) {
4961
print $fh_combined $hash_f1->{$key}."\t".$hash_f2->{$key}."\n";
5062
$count++;
@@ -60,17 +72,17 @@ sub get_file_hash {
6072
my $count=0;
6173
while (my $line=<$fh>) {
6274
$count++;
63-
next if $line=~/^\s+\n/;
75+
next if $line=~/^\n/;
6476
$line=~s/\n//g;
6577
my ($col_key) = get_col_key($line,$columns) if $count >= $header_row;
6678
if($count <= $header_row ) {
6779
if($count==$header_row) {
6880
$file_hash->{'header'}=$line;
81+
print "Columns to compare from file: : $col_key\n" if defined $col_key;
6982
}
7083
else {
7184
$file_hash->{'pre_header_lines'}.="$line\n";
7285
}
73-
print "Columns to compare from file: : $col_key\n" if defined $col_key;
7486
next;
7587
}
7688
$file_hash->{$col_key}="$line";
@@ -113,7 +125,9 @@ sub option_builder {
113125
'f2|file2=s' => \$opts{'f2'},
114126
'c1|col1=s' => \$opts{'c1'},
115127
'c2|col2=s' => \$opts{'c2'},
116-
'hdr|header=s' => \$opts{'hdr'},
128+
'o|output=s' => \$opts{'o'},
129+
'hdr1|header1=s' => \$opts{'hdr1'},
130+
'hdr2|header2=s' => \$opts{'hdr2'},
117131
'v|version' => \$opts{'v'},
118132
);
119133

@@ -129,7 +143,7 @@ =head1 NAME
129143
130144
=head1 SYNOPSIS
131145
132-
merge2files.pl [-h] -f1 -f2 -c1 -c2 [-hdr]
146+
merge2files.pl [-h] -f1 -f2 -c1 -c2 [-hdr1 -hdr2]
133147
134148
Required Options:
135149
--help (-h) This message and format of input file
@@ -138,7 +152,9 @@ =head1 SYNOPSIS
138152
--file2 (-f2) file2
139153
--col1 (-c1) comma separated column numbers fom file1
140154
--col2 (-c2) comma separated columns numbers fom file2
141-
--header (-hdr) header row number
155+
--header1 (-hdr1) header row number first file
156+
--header2 (-hdr2) header row number second file
157+
--output (-o) output folder
142158
143159
=cut
144160

runVirusDetection.sh

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/bash
2+
set -ue
3+
##################
4+
##Author: sb43 #
5+
##Date:9/02/2016 #
6+
##################
7+
in_bam=${1:-}
8+
out_dir=${2:-}
9+
num_threads=${3:-}
10+
11+
usage ()
12+
{
13+
echo 'Usage : runVirusDetection.sh <BAM_FILE> <OUTDIR> <number_of_cpu>'
14+
exit
15+
}
16+
17+
if [[ -z "$in_bam" || -z "$out_dir" || -z "$num_threads" ]]
18+
then
19+
usage
20+
fi
21+
22+
mkdir -p $out_dir/tmp
23+
24+
echo "Feteching unmapped reads......"
25+
#Unmapped read whose mate is mapped
26+
samtools view -u -f 4 -F264 $in_bam > $out_dir/tmp/tmp1.bam &
27+
#mapped read whose mate is unmapped
28+
samtools view -u -f 8 -F260 $in_bam > $out_dir/tmp/tmp2.bam &
29+
#Both reads unmapped
30+
samtools view -u -f 12 -F256 $in_bam > $out_dir/tmp/tmp3.bam &
31+
# wait till all background processes finish
32+
wait
33+
# merge reads
34+
echo "Merging unmapped reads......"
35+
samtools merge -u - $out_dir/tmp/tmp[123].bam | samtools sort -n - $out_dir/tmp/unmapped
36+
#create fastq
37+
echo "Creating fastq input for GOTTCHA script......"
38+
bedtools bamtofastq -i $out_dir/tmp/unmapped.bam -fq $out_dir/tmp/unmapped.fastq
39+
40+
echo " Running GOTTCHA script....."
41+
42+
/software/CGP/external-apps/GOTTCHA/bin/gottcha.pl \
43+
--threads $num_threads \
44+
--mode all \
45+
--minQ 10 \
46+
--outdir $out_dir \
47+
--input $out_dir/tmp/unmapped.fastq \
48+
--database /lustre/scratch112/sanger/cgppipe/canpipe/test/ref/human/38/gottcha_db/database/GOTTCHA_VIRUSES_c5900_k24_u30_xHUMAN3x.strain
49+
50+
echo "Completed virus detection using GOTTCHA....."
51+
echo "Check result file unmapped.gottcha.tsv .... "

0 commit comments

Comments
 (0)