Skip to content

Commit 3ed63db

Browse files
2 parents e40acb2 + efef168 commit 3ed63db

File tree

2 files changed

+74
-30
lines changed

2 files changed

+74
-30
lines changed

hcluster_matrix.sh

+38-9
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212

1313

1414
progname=${0##*/}
15-
VERSION='0.4_7Sep17' # v0.4_7Sep17; added options -x <regex> to select specific rows (genomes)
15+
VERSION='0.5_14Oct17' #v'0.5_14Oct17'; added options -A and -X to control the angle
16+
# and character eXpansion factor of leaf labels
17+
#'0.4_7Sep17' # v0.4_7Sep17; added options -x <regex> to select specific rows (genomes)
1618
# from the input pangenome_matrix_t0.tab
1719
# -c <0|1> to print or not distances in heatmap cells
1820
# -f <int> maximum number of decimals in matrix display (if -c 1)
@@ -42,7 +44,8 @@ function print_help()
4244
OPTIONAL:
4345
-a <string> algorithm/method for clustering
4446
[ward.D|ward.D2|single|complete|average(=UPGMA)] [def $algorithm]
45-
-c <int> 1|0 to display or not the distace values in the heatmap cells [def:$cell_note]
47+
-c <int> 1|0 to display or not the distace values [def:$cell_note]
48+
in the heatmap cells
4649
-d <string> distance type [euclidean|manhattan|gower] [def $distance]
4750
-f <int> maximum number of decimals in matrix display [1,2; def:$decimals]
4851
-t <string> text for Main title [def:$text]
@@ -54,14 +57,19 @@ function print_help()
5457
-W <integer> ouptupt device width [def:$width]
5558
-N <flag> print Notes and exit [flag]
5659
57-
Select genomes from input pangenome_matrix_t0.tab using regular expressions
60+
-A <'integer,integer'> angle to rotate row,col labels [def $angle]
61+
-X <float> leaf label character expansion factor [def $charExp]
62+
63+
64+
Select genomes from input pangenome_matrix_t0.tab using regular expressions:
5865
-x <string> regex, like: 'Escherichia|Salmonella' [def $regex]
66+
5967
6068
EXAMPLE:
61-
$progname -i pangenome_matrix_t0.tab -t "Pan-genome tree for Genus X" -a complete -d manhattan -o pdf -x 'maltoph|genosp'
69+
$progname -i pangenome_matrix_t0.tab -t "Pan-genome tree" -a ward.D2 -d euclidean -o pdf -x 'maltoph|genosp' -A 'NULL,45' -X 0.8
6270
6371
AIM: compute a distance matrix from a pangenome_matrix.tab file produced after running
64-
get_homologues.pl and compare_clusters.pl with options -t 0 -m .
72+
get_homologues.pl and compare_clusters.pl with options -t 0 -m .
6573
The pangenome_matrix.tab file processed by hclust(), and heatmap.2()
6674
6775
OUTPUT: a newick file with extension .ph and svg|pdf output of hclust and heatmap.2 calls
@@ -186,16 +194,22 @@ algorithm=ward.D2
186194
distance=gower
187195
decimals=2
188196

197+
charExp=1.0
198+
angle='NULL,NULL'
199+
#colTax=1
200+
189201
subset_matrix=0
190202

191203

192204
# See bash cookbook 13.1 and 13.2
193-
while getopts ':a:c:i:d:t:m:o:p:f:v:x:H:W:R:hND?:' OPTIONS
205+
while getopts ':a:A:c:i:d:t:m:o:p:f:v:x:X:H:W:R:hND?:' OPTIONS
194206
do
195207
case $OPTIONS in
196208

197209
a) algorithm=$OPTARG
198210
;;
211+
A) angle=$OPTARG
212+
;;
199213
c) cell_note=$OPTARG
200214
;;
201215
d) distance=$OPTARG
@@ -216,6 +230,8 @@ do
216230
;;
217231
x) regex=$OPTARG
218232
;;
233+
X) charExp=$OPTARG
234+
;;
219235
C) reorder_clusters=0
220236
;;
221237
H) height=$OPTARG
@@ -294,6 +310,7 @@ cat << PARAMS
294310
distance:$distance|dist_cutoff:$dist_cutoff|hclustering_meth:$algorithm|cell_note:$cell_note
295311
text:$text|margin_hor:$margin_hor|margin_vert:$margin_vert|points:$points
296312
width:$width|height:$height|outformat:$outformat
313+
angle:"$angle"|charExp:$charExp
297314
##############################################################################################
298315
299316
PARAMS
@@ -307,6 +324,9 @@ tree_file=${tree_file//\//_}
307324
newick_file="hclust_${distance}-${algorithm}_${tab_file%.*}_tree.ph"
308325
newick_file=${newick_file//\//_}
309326

327+
aRow=$(echo "$angle" | cut -d, -f1)
328+
aCol=$(echo "$angle" | cut -d, -f2)
329+
310330
echo ">>> Plotting files $tree_file and $heatmap_outfile ..."
311331
echo " this will take some time, please be patient"
312332
echo
@@ -330,6 +350,8 @@ if($subset_matrix > 0 ){
330350
table <- table[include_list, ]
331351
}
332352
353+
354+
333355
mat_dat <- data.matrix(table[,2:ncol(table)])
334356
335357
rnames <- table[,1]
@@ -348,14 +370,21 @@ dev.off()
348370
349371
if($cell_note == 0){
350372
$outformat(file="$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
351-
heatmap.2(as.matrix(my_dist), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5))
373+
heatmap.2(as.matrix(my_dist), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row",
374+
margins=c($margin_vert,$margin_hor), lhei = c(1,5),
375+
cexRow=$charExp, cexCol=$charExp,
376+
srtRow=$aRow, srtCol=$aCol)
352377
dev.off()
353378
}
354379
355380
if($cell_note == 1){
356381
$outformat(file="$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
357-
heatmap.2(as.matrix(my_dist), cellnote=round(as.matrix(my_dist),$decimals), main="$text $distance dist.", notecol="black", density.info="none", trace="none", dendrogram="row", margins=c($margin_vert,$margin_hor), lhei = c(1,5))
358-
dev.off()
382+
heatmap.2(as.matrix(my_dist), cellnote=round(as.matrix(my_dist),$decimals), main="$text $distance dist.",
383+
notecol="black", density.info="none", trace="none", dendrogram="row",
384+
margins=c($margin_vert,$margin_hor), lhei = c(1,5),
385+
cexRow=$charExp, cexCol=$charExp,
386+
srtRow=$aRow, srtCol=$aCol)
387+
dev.off()
359388
}
360389
361390
RCMD

plot_matrix_heatmap.sh

+36-21
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
#: OUTPUT: svg and pdf; png not implemented yet
1010

1111
progname=${0##*/} # plot_matrix_heatmap.sh
12-
VERSION='v0.5_18Aug17' # added options -d (max no. decimals) and -x (filter matrix with regex)
12+
VERSION='v0.6_14Oct17' # added options -X (charExp) and -a (label rotation angle)
13+
#'v0.5_18Aug17' # added options -d (max no. decimals) and -x (filter matrix with regex)
1314
#'v0.4_17Aug17' # added option -r to remove column names and cell contents, and -k
1415
#'v0.3_13Apr16' # added option -c to filter input matrix by a maximum similarity cut-off value
1516
# to reduce excessive redundancy. Improved the help text printed with -M
@@ -41,16 +42,18 @@ function print_help()
4142
-d <int> maximum number of decimals in matrix display [0-2; def: $decimals]
4243
4344
II) tweak the graphical output:
44-
-t <string> text for plot title [def input_tab_file_name]
45-
-m <integer> margins_horizontal [def $margin_hor]
46-
-v <integer> margins_vertical [def $margin_vert]
47-
-o <string> output file format [def $outformat]
48-
-p <integer> points for plotting device [def $points]
49-
-H <integer> output device height [def $height]
50-
-W <integer> output device width [def $width]
51-
-C <flag> do not reorder clusters [def reorder clusters and plot dendrogram]
52-
-r <flag> remove column names and cell contents [def names and cell contents are printed]
53-
-k <string> text for scale X-axis [def "Value"]
45+
-a <'integer,integer'> angle to rotate row,col labels [def $angle]
46+
-t <string> text for plot title [def input_tab_file_name]
47+
-m <integer> margins_horizontal [def $margin_hor]
48+
-v <integer> margins_vertical [def $margin_vert]
49+
-o <string> output file format [def $outformat]
50+
-p <integer> points for plotting device [def $points]
51+
-H <integer> output device height [def $height]
52+
-W <integer> output device width [def $width]
53+
-C <flag> do not reorder clusters [def reorder clusters and plot dendrogram]
54+
-r <flag> remove column names and cell contents [def names and cell contents are printed]
55+
-k <string> text for scale X-axis [def "Value"]
56+
-X <float> character expansion factor [def $charExp]
5457
5558
RUN NJ ANALYSIS using ANI matrix (average identity matrix) generated by get_homologues.pl -A -t 0 -M|G
5659
-N <flag> will compute a distance matrix from the input similarity matrix
@@ -64,7 +67,7 @@ function print_help()
6467
-M <flag> prints gplot installation instructions and further usage information
6568
6669
EXAMPLE:
67-
$progname -i Avg_identity.tab -c 98.5 -t "Genus X ANIb (OMCL all clusters)" -N -o pdf -m 22 -v 22 -p 20 -H 20 -W 30 -x 'Smalt|Smc'
70+
$progname -i Avg_identity.tab -c 99.1 -t "ANIb (OMCL core-clusters)" -N -o pdf -m 22 -v 22 -p 20 -H 20 -W 30 -x 'Smalt|Smc' -d 1 -a 'NULL,45' -X 0.9
6871
6972
#------------------------------------------------------------------------------------------------------------------
7073
AIM: Plot ordered heatmaps with row and col. dendrogram, from squared numeric (distance or presence-absence) matrix,
@@ -179,14 +182,18 @@ reorder_clusters=1
179182
remove_colnames=0
180183
key_xaxis="Value"
181184
decimals=0
185+
charExp=1.0
186+
angle='NULL,NULL'
182187

183188
subset_matrix=0
184189

185190
# See bash cookbook 13.1 and 13.2
186-
while getopts ':c:i:d:t:m:o:p:v:x:H:W:k:hrMNC?:' OPTIONS
191+
while getopts ':a:c:i:d:t:m:o:p:v:x:X:H:W:k:hrMNC?:' OPTIONS
187192
do
188193
case $OPTIONS in
189194

195+
a) angle=$OPTARG
196+
;;
190197
c) sim_cutoff=$OPTARG
191198
;;
192199
d) decimals=$OPTARG
@@ -217,6 +224,8 @@ do
217224
;;
218225
x) regex=$OPTARG
219226
;;
227+
X) charExp=$OPTARG
228+
;;
220229
C) reorder_clusters=0
221230
;;
222231
\:) printf "argument missing from -%s option\n" $OPTARG
@@ -270,6 +279,7 @@ cat << PARAMS
270279
input tab_file = $tab_file | sim_cutoff = $sim_cutoff | max_decimals = $decimals
271280
subset_matrix = $subset_matrix | regex = $regex
272281
text=$text|margin_hor=$margin_hor|margin_vert=$margin_vert|points=$points
282+
angle=$angle|charExp=$charExp
273283
width=$width|height=$height|outformat=$outformat
274284
reorder_clusters=$reorder_clusters|remove_colnames=$remove_colnames|key_xaxis=$key_xaxis|do_bioNJ=$do_nj
275285
@@ -290,6 +300,11 @@ else
290300
nj_tree="${tab_file%.*}_BioNJ.ph"
291301
fi
292302

303+
aRow=$(echo "$angle" | cut -d, -f1)
304+
aCol=$(echo "$angle" | cut -d, -f2)
305+
306+
307+
293308
# 2) call R using a heredoc and write the resulting script to file
294309
R --no-save -q <<RCMD > ${progname%.*}_script_run_at_${start_time}.R
295310
library("gplots")
@@ -307,12 +322,8 @@ if($subset_matrix > 0 ){
307322
include_list <- grep("$regex", rownames(mat_dat))
308323
mat_dat <- mat_dat[include_list, ]
309324
mat_dat <- mat_dat[,include_list]
310-
#mat_dat <- f.mat2
311-
#rm(f.mat)
312325
}
313326
314-
315-
316327
if($sim_cutoff < 100)
317328
{
318329
tmp_mat = mat_dat
@@ -325,26 +336,30 @@ if($reorder_clusters > 0){
325336
if($remove_colnames > 0){
326337
$outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
327338
heatmap.2(mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis",
328-
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), labCol=F)
339+
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), labCol=F,
340+
cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol)
329341
dev.off()
330342
}
331343
else {
332344
$outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
333345
heatmap.2(mat_dat, cellnote=mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis",
334-
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5))
346+
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5),
347+
cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol)
335348
dev.off()
336349
}
337350
} else {
338351
if($remove_colnames > 0){
339352
$outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
340353
heatmap.2(mat_dat, main="$text", notecol="black", density.info="none", labCol=F, key.xlab="$key_xaxis",
341-
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE)
354+
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE,
355+
cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol)
342356
dev.off()
343357
}
344358
else {
345359
$outformat("$heatmap_outfile", width=$width, height=$height, pointsize=$pointsize)
346360
heatmap.2(mat_dat, cellnote=mat_dat, main="$text", notecol="black", density.info="none", key.xlab="$key_xaxis",
347-
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE)
361+
trace="none", margins=c($margin_vert,$margin_hor), lhei = c(1,5), dendrogram = "row", Colv = FALSE,
362+
cexRow=$charExp, cexCol=$charExp, srtRow=$aRow, srtCol=$aCol)
348363
dev.off()
349364
}
350365
}

0 commit comments

Comments
 (0)