-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFinal_PopGenome_Figures.R
3707 lines (2874 loc) · 277 KB
/
Final_PopGenome_Figures.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
if(!require(RColorBrewer)) {
install.packages("RColorBrewer",repos = "http://cran.us.r-project.org")
}
library("RColorBrewer")
if(!require(gplots)) {
install.packages("gplots",repos = "http://cran.us.r-project.org")
}
library("gplots")
if(!require(png)) {
install.packages("png",repos = "http://cran.us.r-project.org")
}
library(png)
if(!require(grid)) {
install.packages("grid",repos = "http://cran.us.r-project.org")
}
library(grid)
if(!require(scatterplot3d)) {
install.packages("scatterplot3d",repos = "http://cran.us.r-project.org")
}
library(scatterplot3d)
if(!require(plotly)) {
install.packages("plotly",repos = "http://cran.us.r-project.org")
}
library(plotly)
if(!require(ggplot2)) {
install.packages("ggplot2",repos = "http://cran.us.r-project.org")
}
library("ggplot2")
if(!require(jpeg)) {
install.packages("jpeg",repos = "http://cran.us.r-project.org")
}
library(jpeg)
if(!require(data.table)) {
install.packages("data.table",repos = "http://cran.us.r-project.org")
}
library(data.table)
if(!require(ggpubr)) {
install.packages("ggpubr",repos = "http://cran.us.r-project.org")
}
library(ggpubr)
if(!require(ggsignif)) {
install.packages("ggsignif",repos = "http://cran.us.r-project.org")
}
library(ggsignif)
#if(!require(Biostrings)) {
# install.packages("Biostrings",repos = "http://cran.us.r-project.org")
#}
#library("Biostrings")
if(!require(ape)) {
install.packages("ape",repos = "http://cran.us.r-project.org")
}
library("ape")
if(!require(phytools)) {
install.packages("phytools",repos = "http://cran.us.r-project.org")
}
library("phytools")
if(!require(seqinr)) {
install.packages("seqinr",repos = "http://cran.us.r-project.org")
}
library(seqinr)
if(!require(ggplot2)) {
install.packages("ggplot2",repos = "http://cran.us.r-project.org")
}
library("ggplot2")
if(!require(jpeg)) {
install.packages("jpeg",repos = "http://cran.us.r-project.org")
}
library(jpeg)
if(!require(data.table)) {
install.packages("data.table",repos = "http://cran.us.r-project.org")
}
library(data.table)
if(!require(ggpubr)) {
install.packages("ggpubr",repos = "http://cran.us.r-project.org")
}
library(ggpubr)
if(!require(ggsignif)) {
install.packages("ggsignif",repos = "http://cran.us.r-project.org")
}
if(!require(ggtree)) {
install.packages("ggtree",repos = "http://cran.us.r-project.org")
}
library(ggtree)
library("ape")
library("PopGenome")
library("pegas")
library("tidyverse")
library(ggsignif)
library(gggenes)
library(gridExtra)
library(grid)
library(ggplot2)
library(lattice)
library(scales)
BmFileList<-list.files("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/BMalayi",pattern=".tsv")
BpFileList<-list.files("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/BPahangi",pattern=".tsv")
WbFileList<-list.files("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/WBancrofti",pattern=".tsv")
BtFileList<-list.files("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/BTimori",pattern=".tsv")
#BpFileList<-FileList[grep("Bp",FileList)]
#BmFileList<-FileList[grep("Bp",FileList,invert = T)]
Bp.res<-as.data.frame(read.delim("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/BPahangi.res.txt",header = FALSE,stringsAsFactors = FALSE))
Bm.res<-as.data.frame(read.delim("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/BMalayi.res.txt",header = FALSE,stringsAsFactors = FALSE))
Wb.res<-as.data.frame(read.delim("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/WBancrofti.res.txt",header = FALSE,stringsAsFactors = FALSE))
BT.res<-as.data.frame(read.delim("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/b.timori.res.txt",header = FALSE,stringsAsFactors = FALSE))
Bp.chr.res<-Bp.res[grep("Chr",Bp.res$V1),1:2]
Bm.chr.res<-Bm.res[grep("Chr",Bm.res$V1),1:2]
Wb.res<-Wb.res[,1:2]
BT.res<-BT.res[,1:2]
###Wuch Chromosome Contigs
BMRelevants<-Bm.chr.res$V1
if(1 == 2){
Matches<-data.frame()
pdf("/Users/jmattick/Documents/Wuch_vs_BMRef_ChrMatches_NEW.pdf")
for (a in 1:length(BMRelevants))
{
##Get BP Contigs Attached to BM via Promer
sub_Bm_vs_newBP<-Bm_vs_newBP[Bm_vs_newBP$V9 == BMRelevants[a],]
chrLength<-as.numeric(unique(sub_Bm_vs_newBP$V7))
contigs<-unique(sub_Bm_vs_newBP$V10)
HighLengthContigs<-data.frame(matrix(ncol=2,nrow=length(contigs)),stringsAsFactors = FALSE)
colnames(HighLengthContigs)<-c("Contig","Length")
for (b in 1:length(contigs))
{
totallen<-sum(sub_Bm_vs_newBP[sub_Bm_vs_newBP$V10 == contigs[b],]$V6)
HighLengthContigs[b,1]<-contigs[b]
HighLengthContigs[b,2]<-totallen
}
HighLengthContigs$Length<-HighLengthContigs$Length/chrLength
HighLengthContigsFiltered<-HighLengthContigs[HighLengthContigs$Length > 0.001,]
sumcovered<-round(sum(HighLengthContigsFiltered$Length)*100,digits=1)
##Subset Data Frames
HighLengthContigsFiltered<-HighLengthContigs
signif_sub_Bm_vs_newBP<-sub_Bm_vs_newBP[sub_Bm_vs_newBP$V10 %in% HighLengthContigsFiltered$Contig,]
colnames(signif_sub_Bm_vs_newBP)<-c("BMStart","BMEnd","BPStart","BPEnd","BMLength","BPLength","BMSize","BPSize","BMContig","BPContig")
if(length(signif_sub_Bm_vs_newBP$BMStart) > 0){
print(ggplot() + geom_segment(data=signif_sub_Bm_vs_newBP,mapping=aes(x=BMStart,xend=BMEnd,y=BPStart,yend=BPEnd)) + ggtitle(paste("Promer match for ",BMRelevants[a]," vs Wuch Contigs\nCovering ",sumcovered,"%",sep="")) + theme_bw()+theme(plot.title = element_text(size=14))+labs(x=BMRelevants[a],y="WuchContigs"))
MatchesSpecific<-HighLengthContigsFiltered
MatchesSpecific$BMChr<-BMRelevants[a]
Matches<-rbind(Matches,MatchesSpecific)
#+ facet_grid(V10~.,scales="free",space="free")
#,color=BPContig
# + facet_grid(BPContig~.,scales="free",space="free")
}
}
dev.off()
}
####Wuch Contig IDs...
Bm_vs_Wuch <- as.data.frame(read.delim("/Users/jmattick/Documents/NucmerBmtoWuch.coords", sep="\t", header=FALSE,stringsAsFactors = F))
WuchAssignments<-data.frame(matrix(ncol=3,nrow=length(Wb.res$V1)),stringsAsFactors = FALSE)
colnames(WuchAssignments)<-c("Contig","Chr","Start")
for (a in 1:length(Wb.res$V1))
{
subBm_vs_Wuch<-Bm_vs_Wuch[Bm_vs_Wuch$V10 == as.character(Wb.res[a,]$V1),]
if(length(subBm_vs_Wuch$V1) > 0) {
ChrList<-unique(subBm_vs_Wuch$V9)
PickingFrame<-data.frame(matrix(ncol=3,nrow=length(ChrList)),stringsAsFactors = FALSE)
colnames(PickingFrame)<-c("Chr","Sum","Start")
for (b in 1:length(ChrList))
{
sumvalue<-sum(subBm_vs_Wuch[subBm_vs_Wuch$V9==ChrList[b],]$V5)
subBm_vs_Wuch<-subBm_vs_Wuch[order(subBm_vs_Wuch$V5,decreasing = TRUE),]
startvalue<-as.numeric(subBm_vs_Wuch[1,]$V1)
PickingFrame[b,1]<-ChrList[b]
PickingFrame[b,2]<-sumvalue
PickingFrame[b,3]<-startvalue
}
PickingFrame<-PickingFrame[order(PickingFrame$Sum,decreasing = TRUE),]
WuchAssignments[a,1]<-as.character(Wb.res[a,]$V1)
WuchAssignments[a,2]<-PickingFrame[1,]$Chr
WuchAssignments[a,3]<-PickingFrame[1,]$Start
} else {
WuchAssignments[a,1]<-as.character(Wb.res[a,]$V1)
WuchAssignments[a,2]<-"None"
WuchAssignments[a,3]<-"NA"
}
}
WuchAssignments_Chrs<-WuchAssignments[grep("Chr",WuchAssignments$Chr),]
Bm_vs_Tim <- as.data.frame(read.delim("/Users/jmattick/Documents/BMvsBTCURRENT.coords", sep="\t", header=FALSE,stringsAsFactors = F))
TimAssignments<-data.frame(matrix(ncol=3,nrow=length(BT.res$V1)),stringsAsFactors = FALSE)
colnames(TimAssignments)<-c("Contig","Chr","Start")
for (a in 1:length(BT.res$V1))
{
subBm_vs_Tim<-Bm_vs_Tim[Bm_vs_Tim$V10 == as.character(BT.res[a,]$V1),]
if(length(subBm_vs_Tim$V1) > 0) {
ChrList<-unique(subBm_vs_Tim$V9)
PickingFrame<-data.frame(matrix(ncol=3,nrow=length(ChrList)),stringsAsFactors = FALSE)
colnames(PickingFrame)<-c("Chr","Sum","Start")
for (b in 1:length(ChrList))
{
sumvalue<-sum(subBm_vs_Tim[subBm_vs_Tim$V9==ChrList[b],]$V5)
subBm_vs_Tim<-subBm_vs_Tim[order(subBm_vs_Tim$V5,decreasing = TRUE),]
startvalue<-as.numeric(subBm_vs_Tim[1,]$V1)
PickingFrame[b,1]<-ChrList[b]
PickingFrame[b,2]<-sumvalue
PickingFrame[b,3]<-startvalue
}
PickingFrame<-PickingFrame[order(PickingFrame$Sum,decreasing = TRUE),]
TimAssignments[a,1]<-as.character(BT.res[a,]$V1)
TimAssignments[a,2]<-PickingFrame[1,]$Chr
TimAssignments[a,3]<-PickingFrame[1,]$Start
} else {
TimAssignments[a,1]<-as.character(BT.res[a,]$V1)
TimAssignments[a,2]<-"None"
TimAssignments[a,3]<-"NA"
}
}
TimAssignments_Chrs<-TimAssignments[grep("Chr",TimAssignments$Chr),]
#pdf("/Users/jmattick/Documents/Bruga.malayi.pahangi.pi.pdf",width = 6,height = 15)
#pdf("/Users/jmattick/Documents/Pop.Genome.Figure2.pdf",width = 9,height = 15,useDingbats=FALSE)
pdf("/Users/jmattick/Documents/BPahangiAll.pdf",width = 9,height = 15,useDingbats=FALSE)
#pdf("/Users/jmattick/Documents/BPahangiAll_het.pdf",width = 9,height = 15,useDingbats=FALSE)
TotalMaxPreCalc<-0.02452273
###BP Plotting
#BPGroups<-c("MultiAF","Clinical","FR3","AM")
BPGroups<-c("All")
TotalMax<-c()
for (a in 1:length(BPGroups))
{
Graphing<-data.frame()
if(BPGroups[a] == "MultiAF"){
subBPGroups<-BpFileList[1]
}else if(BPGroups[a] == "Clinical"){
subBPGroups<-BpFileList[2:4]
}else if(BPGroups[a] == "FR3"){
subBPGroups<-BpFileList[10:13]
}else if(BPGroups[a] == "AM"){
subBPGroups<-BpFileList[7:9]
}else if(BPGroups[a] == "All"){
subBPGroups<-BpFileList[c(1:4,7:13)]
}
TotalSNPs<-data.frame()
SampleFactor<-length(subBPGroups)
for (b in 1:SampleFactor)
{
TempSNP<-as.data.frame(read.delim(paste("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/BPahangi/",subBPGroups[b],sep=""), sep="\t", header=FALSE,stringsAsFactors = FALSE))
colnames(TempSNP)<-c("Chr","Pos","Zyg")
TempSNP<-TempSNP[grep("Chr",TempSNP$Chr),]
TotalSNPs<-rbind(TempSNP,TotalSNPs)
}
#TotalSNPs<-TotalSNPs[TotalSNPs$Zyg == "het",]
ChrList<-Bm.chr.res$V1
for (b in 1:length(ChrList))
{
ChrName<-strsplit(Bm.chr.res[b,]$V1,"_")[[1]][3]
subChromosomes<-unique(TotalSNPs[grep(ChrName,TotalSNPs$Chr),]$Chr)
subChromosomes<-subChromosomes[order(subChromosomes)]
for (c in 1:length(subChromosomes))
{
ChrSize<-as.numeric(Bp.res[Bp.res$V1 == subChromosomes[c],]$V2)
subSNPs<-TotalSNPs[TotalSNPs$Chr == subChromosomes[c],]
PartialGraphing<-data.frame(matrix(ncol=4,nrow=ceiling(ChrSize/10000)),stringsAsFactors = FALSE)
colnames(PartialGraphing)<-c("Position","SNVDensity","Chr","subChr")
for (d in 1:ceiling(ChrSize/10000))
{
if(d<ceiling(ChrSize/10000)){
SNVDensity<-(length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "het",]$Pos)+(2*length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "hom",]$Pos)))/(20000*SampleFactor)
PartialGraphing[d,]$Position<-(d*10000)
PartialGraphing[d,]$SNVDensity<-SNVDensity
PartialGraphing[d,]$Chr<-ChrName
PartialGraphing[d,]$subChr<-subChromosomes[c]
} else {
SNVDensity<-(length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= ChrSize & subSNPs$Zyg == "het",]$Pos)+(2*length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "hom",]$Pos)))/((ChrSize-((d*10000)-10000))*SampleFactor)
PartialGraphing[d,]$Position<-ChrSize
PartialGraphing[d,]$SNVDensity<-SNVDensity
PartialGraphing[d,]$Chr<-ChrName
PartialGraphing[d,]$subChr<-subChromosomes[c]
}
}
Graphing<-rbind(Graphing,PartialGraphing)
}
}
SNVDensity_ymax<-max(Graphing$SNVDensity)
TotalMax<-c(TotalMax,SNVDensity_ymax)
Graphing$Position<-Graphing$Position/1000000
g1<-ggplot(Graphing[Graphing$Chr == "Chr1",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("A") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g2<-ggplot(Graphing[Graphing$Chr == "Chr2",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("B") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g3<-ggplot(Graphing[Graphing$Chr == "Chr3",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("C") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g4<-ggplot(Graphing[Graphing$Chr == "Chr4",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("D") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g5<-ggplot(Graphing[Graphing$Chr == "ChrX",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("E") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
#print(ggarrange(g1,g2,g3,g4,g5,ncol=1,nrow=5,common.legend = TRUE,legend = "bottom"))
gl<-list(g1,g2,g3,g4,g5)
print(grid.arrange(grobs = gl, widths = c(5,5),layout_matrix = rbind(c(1,2),c(3,4),c(5,5))))
BPahangiGraphing<-Graphing
}
dev.off()
g1<-ggplot(Graphing[Graphing$Chr == "Chr1",]) + geom_point(aes(x=Position,y=Pi),size=1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("Chr1") + theme(legend.position="none",plot.title = element_text(size=24,face = "bold"),text = element_text(size=20),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g2<-ggplot(Graphing[Graphing$Chr == "Chr2",]) + geom_point(aes(x=Position,y=Pi),size=1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("Chr2") + theme(legend.position="none",plot.title = element_text(size=24,face = "bold"),text = element_text(size=20),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g3<-ggplot(Graphing[Graphing$Chr == "Chr3",]) + geom_point(aes(x=Position,y=Pi),size=1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("Chr3") + theme(legend.position="none",plot.title = element_text(size=24,face = "bold"),text = element_text(size=20),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g4<-ggplot(Graphing[Graphing$Chr == "Chr4",]) + geom_point(aes(x=Position,y=Pi),size=1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("Chr4") + theme(legend.position="none",plot.title = element_text(size=24,face = "bold"),text = element_text(size=20),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g5<-ggplot(Graphing[Graphing$Chr == "ChrX",]) + geom_point(aes(x=Position,y=Pi),size=1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("ChrX") + theme(legend.position="none",plot.title = element_text(size=24,face = "bold"),text = element_text(size=20),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
pdf("/Users/jmattick/Documents/BPChr1.pdf",width = 20,height = 10,useDingbats=FALSE)
print(g1)
dev.off()
pdf("/Users/jmattick/Documents/BPChr2.pdf",width = 20,height = 10,useDingbats=FALSE)
print(g2)
dev.off()
pdf("/Users/jmattick/Documents/BPChr3.pdf",width = 20,height = 10,useDingbats=FALSE)
print(g3)
dev.off()
pdf("/Users/jmattick/Documents/BPChr4.pdf",width = 20,height = 10,useDingbats=FALSE)
print(g4)
dev.off()
#pdf("/Users/jmattick/Documents/BPChrX.pdf",width = 20,height = 10,useDingbats=FALSE)
pdf("/Users/jmattick/Documents/BPChrX_het.pdf",width = 20,height = 10,useDingbats=FALSE)
print(g5)
dev.off()
###BM Plotting
#BMGroups<-c("Lucknow","Malaysia","FR3","TRS","WashU","Liverpool")
#pdf("/Users/jmattick/Documents/Pop.Genome.Figure1.pdf",width = 9,height = 15,useDingbats=FALSE)
pdf("/Users/jmattick/Documents/BMalayi_all.pdf",width = 9,height = 15,useDingbats=FALSE)
#pdf("/Users/jmattick/Documents/BMalayi_subsetted.pdf",width = 9,height = 15,useDingbats=FALSE)
BMGroups<-c("All")
#BMGroups<-c("Lucknow","Malaysia","FR3","TRS","WashU","Liverpool")
#BMGroups<-c("Malaysia","Malaysia_Masked")
TotalMaxPreCalc<-0.02
TotalMax<-0
for (a in 1:length(BMGroups))
{
Graphing<-data.frame()
if(BMGroups[a] == "Lucknow"){
subBMGroups<-BmFileList[1:4]
}else if(BMGroups[a] == "Malaysia"){
subBMGroups<-BmFileList[5:8]
}else if(BMGroups[a] == "FR3"){
subBMGroups<-BmFileList[9:12]
}else if(BMGroups[a] == "TRS"){
subBMGroups<-BmFileList[c(13:16)]
}else if(BMGroups[a] == "WashU"){
subBMGroups<-BmFileList[21:26]
}else if(BMGroups[a] == "Liverpool"){
subBMGroups<-BmFileList[17:20]
}else if(BMGroups[a] == "All"){
subBMGroups<-BmFileList
}
TotalSNPs<-data.frame()
SampleFactor<-length(subBMGroups)
for (b in 1:SampleFactor)
{
TempSNP<-as.data.frame(read.delim(paste("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/BMalayi/",subBMGroups[b],sep=""), sep="\t", header=FALSE,stringsAsFactors = FALSE))
colnames(TempSNP)<-c("Chr","Pos","Zyg")
TempSNP<-TempSNP[grep("Chr",TempSNP$Chr),]
TotalSNPs<-rbind(TempSNP,TotalSNPs)
}
ChrList<-Bm.chr.res$V1
for (b in 1:length(ChrList))
{
ChrName<-strsplit(Bm.chr.res[b,]$V1,"_")[[1]][3]
subChromosomes<-unique(TotalSNPs[grep(ChrName,TotalSNPs$Chr),]$Chr)
subChromosomes<-subChromosomes[order(subChromosomes)]
for (c in 1:length(subChromosomes))
{
ChrSize<-as.numeric(Bm.res[Bm.res$V1 == subChromosomes[c],]$V2)
subSNPs<-TotalSNPs[TotalSNPs$Chr == subChromosomes[c],]
PartialGraphing<-data.frame(matrix(ncol=4,nrow=ceiling(ChrSize/10000)),stringsAsFactors = FALSE)
colnames(PartialGraphing)<-c("Position","SNVDensity","Chr","subChr")
for (d in 1:ceiling(ChrSize/10000))
{
if(d<ceiling(ChrSize/10000)){
SNVDensity<-(length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "het",]$Pos)+(2*length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "hom",]$Pos)))/(20000*SampleFactor)
PartialGraphing[d,]$Position<-(d*10000)
PartialGraphing[d,]$SNVDensity<-SNVDensity
PartialGraphing[d,]$Chr<-ChrName
PartialGraphing[d,]$subChr<-subChromosomes[c]
} else {
SNVDensity<-(length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= ChrSize & subSNPs$Zyg == "het",]$Pos)+(2*length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "hom",]$Pos)))/((ChrSize-((d*10000)-10000))*SampleFactor)
PartialGraphing[d,]$Position<-ChrSize
PartialGraphing[d,]$SNVDensity<-SNVDensity
PartialGraphing[d,]$Chr<-ChrName
PartialGraphing[d,]$subChr<-subChromosomes[c]
}
}
Graphing<-rbind(Graphing,PartialGraphing)
}
}
SNVDensity_ymax<-max(Graphing$SNVDensity)
TotalMax<-max(c(TotalMax,SNVDensity_ymax))
Graphing$Position<-Graphing$Position/1000000
#g6<-ggplot(Graphing[Graphing$Chr == "Chr1",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("A") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
#g7<-ggplot(Graphing[Graphing$Chr == "Chr2",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("B") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
#g8<-ggplot(Graphing[Graphing$Chr == "Chr3",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("C") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
#g9<-ggplot(Graphing[Graphing$Chr == "Chr4",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("D") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
#g10<-ggplot(Graphing[Graphing$Chr == "ChrX",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle("E") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g6<-ggplot(Graphing[Graphing$Chr == "Chr1",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle(paste(BMGroups[a],":A",sep="")) + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g7<-ggplot(Graphing[Graphing$Chr == "Chr2",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle(paste(BMGroups[a],":B",sep="")) + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g8<-ggplot(Graphing[Graphing$Chr == "Chr3",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle(paste(BMGroups[a],":C",sep="")) + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g9<-ggplot(Graphing[Graphing$Chr == "Chr4",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle(paste(BMGroups[a],":D",sep="")) + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g10<-ggplot(Graphing[Graphing$Chr == "ChrX",]) + geom_point(aes(x=Position,y=SNVDensity),size=0.1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle(paste(BMGroups[a],":E",sep="")) + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
#print(ggarrange(g1,g2,g3,g4,g5,ncol=1,nrow=5,common.legend = TRUE,legend = "bottom"))
gl<-list(g6,g7,g8,g9,g10)
print(grid.arrange(grobs = gl, widths = c(5,5),layout_matrix = rbind(c(1,2),c(3,4),c(5,5))))
BMalayiGraphing<-Graphing
#BMalayiGraphingStorage<-BMalayiGraphing
}
dev.off()
g6<-ggplot(Graphing[Graphing$Chr == "Chr1",]) + geom_point(aes(x=Position,y=Pi),size=1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle(paste(BMGroups[a],":Chr1",sep="")) + theme(legend.position="none",plot.title = element_text(size=24,face = "bold"),text = element_text(size=20),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g7<-ggplot(Graphing[Graphing$Chr == "Chr2",]) + geom_point(aes(x=Position,y=Pi),size=1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle(paste(BMGroups[a],":Chr2",sep="")) + theme(legend.position="none",plot.title = element_text(size=24,face = "bold"),text = element_text(size=20),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g8<-ggplot(Graphing[Graphing$Chr == "Chr3",]) + geom_point(aes(x=Position,y=Pi),size=1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle(paste(BMGroups[a],":Chr3",sep="")) + theme(legend.position="none",plot.title = element_text(size=24,face = "bold"),text = element_text(size=20),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g9<-ggplot(Graphing[Graphing$Chr == "Chr4",]) + geom_point(aes(x=Position,y=Pi),size=1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle(paste(BMGroups[a],":Chr4",sep="")) + theme(legend.position="none",plot.title = element_text(size=24,face = "bold"),text = element_text(size=20),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g10<-ggplot(Graphing[Graphing$Chr == "ChrX",]) + geom_point(aes(x=Position,y=Pi),size=1) + theme_bw() + facet_grid(~subChr,scales="free",space="free") + ggtitle(paste(BMGroups[a],":ChrX",sep="")) + theme(legend.position="none",plot.title = element_text(size=24,face = "bold"),text = element_text(size=20),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
pdf("/Users/jmattick/Documents/BMChr1.pdf",width = 20,height = 10,useDingbats=FALSE)
print(g6)
dev.off()
pdf("/Users/jmattick/Documents/BMChr2.pdf",width = 20,height = 10,useDingbats=FALSE)
print(g7)
dev.off()
pdf("/Users/jmattick/Documents/BMChr3.pdf",width = 20,height = 10,useDingbats=FALSE)
print(g8)
dev.off()
pdf("/Users/jmattick/Documents/BMChr4.pdf",width = 20,height = 10,useDingbats=FALSE)
print(g9)
dev.off()
pdf("/Users/jmattick/Documents/BMChrX.pdf",width = 20,height = 10,useDingbats=FALSE)
print(g10)
dev.off()
#BMalayiGraphing<-BMalayiGraphingStorage
###Wuchereria Plotting
#pdf("/Users/jmattick/Documents/WBancroftiPlottingAll.pdf",width = 9,height = 15,useDingbats=FALSE)
pdf("/Users/jmattick/Documents/WBancroftiPlotting_lowmax.pdf",width = 20,height = 10,useDingbats=FALSE)
#TotalMaxPreCalc<-0.075
TotalMaxPreCalc<-0.025
###WB Plotting
#WBGroups<-c("Haiti","Kenya","Mali","PNG")
WBGroups<-c("All")
TotalMax<-c()
for (a in 1:length(WBGroups))
{
Graphing<-data.frame()
if(WBGroups[a] == "Haiti"){
subWBGroups<-WbFileList[1:4]
}else if(WBGroups[a] == "Kenya"){
subWBGroups<-WbFileList[5:8]
}else if(WBGroups[a] == "Mali"){
subWBGroups<-WbFileList[9:12]
}else if(WBGroups[a] == "PNG"){
subWBGroups<-WbFileList[13:16]
}else if(WBGroups[a] == "All"){
subWBGroups<-WbFileList
}
TotalSNPs<-data.frame()
SampleFactor<-length(subWBGroups)
for (b in 1:SampleFactor)
{
TempSNP<-as.data.frame(read.delim(paste("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/WBancrofti/",subWBGroups[b],sep=""), sep="\t", header=FALSE,stringsAsFactors = FALSE))
colnames(TempSNP)<-c("Chr","Pos","Zyg")
TotalSNPs<-rbind(TempSNP,TotalSNPs)
}
ChrList<-Bm.chr.res$V1
for (b in 1:length(ChrList))
{
ChrName<-strsplit(Bm.chr.res[b,]$V1,"_")[[1]][3]
subContigs<-WuchAssignments_Chrs[grep(ChrName,WuchAssignments_Chrs$Chr),]
subContigs<-subContigs[order(subContigs$Start),]
sumpos<-0
for (c in 1:length(subContigs$Contig))
{
ChrSize<-as.numeric(Wb.res[Wb.res$V1 == subContigs[c,]$Contig,]$V2)
subSNPs<-TotalSNPs[TotalSNPs$Chr == subContigs[c,]$Contig,]
PartialGraphing<-data.frame(matrix(ncol=5,nrow=ceiling(ChrSize/10000)),stringsAsFactors = FALSE)
colnames(PartialGraphing)<-c("Position","SumPosition","Pi","Chr","subChr")
for (d in 1:ceiling(ChrSize/10000))
{
if(d<ceiling(ChrSize/10000)){
pi<-(length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "het",]$Pos)+(2*length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "hom",]$Pos)))/(20000*SampleFactor)
PartialGraphing[d,]$Position<-(d*10000)
PartialGraphing[d,]$SumPosition<-(d*10000)+sumpos
PartialGraphing[d,]$Pi<-pi
PartialGraphing[d,]$Chr<-ChrName
PartialGraphing[d,]$subChr<-as.character(subContigs[c,]$Contig)
} else {
pi<-(length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= ChrSize & subSNPs$Zyg == "het",]$Pos)+(2*length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "hom",]$Pos)))/((ChrSize-((d*10000)-10000))*SampleFactor)
PartialGraphing[d,]$Position<-ChrSize
PartialGraphing[d,]$SumPosition<-ChrSize+sumpos
PartialGraphing[d,]$Pi<-pi
PartialGraphing[d,]$Chr<-ChrName
PartialGraphing[d,]$subChr<-as.character(subContigs[c,]$Contig)
}
}
sumpos<-sumpos+ChrSize
Graphing<-rbind(Graphing,PartialGraphing)
if(length(Graphing[is.na(Graphing$Pi),]$Pi) > 0){
stop()
}
}
}
pi_ymax<-max(Graphing$Pi)
TotalMax<-c(TotalMax,pi_ymax)
Graphing$Position<-Graphing$Position/1000000
Graphing$SumPosition<-Graphing$SumPosition/1000000
g1<-ggplot(Graphing[Graphing$Chr == "Chr1",]) + geom_point(aes(x=SumPosition,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("A") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g2<-ggplot(Graphing[Graphing$Chr == "Chr2",]) + geom_point(aes(x=SumPosition,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("B") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g3<-ggplot(Graphing[Graphing$Chr == "Chr3",]) + geom_point(aes(x=SumPosition,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("C") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g4<-ggplot(Graphing[Graphing$Chr == "Chr4",]) + geom_point(aes(x=SumPosition,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("D") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g5<-ggplot(Graphing[Graphing$Chr == "ChrX",]) + geom_point(aes(x=SumPosition,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("E") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
#print(ggarrange(g1,g2,g3,g4,g5,ncol=1,nrow=5,common.legend = TRUE,legend = "bottom"))
gl<-list(g1,g2,g3,g4,g5)
print(grid.arrange(grobs = gl, widths = c(5,5),layout_matrix = rbind(c(1,2),c(3,4),c(5,5))))
WBancroftiGraphing<-Graphing
}
dev.off()
pdf("/Users/jmattick/Documents/BTimoriPlotting.pdf",width = 20,height = 10,useDingbats=FALSE)
TotalMaxPreCalc<-0.025
TotalMax<-c()
for (a in 1:length(WBGroups))
{
Graphing<-data.frame()
subBTGroups<-BtFileList
TotalSNPs<-data.frame()
SampleFactor<-length(subBTGroups)
for (b in 1:SampleFactor)
{
TempSNP<-as.data.frame(read.delim(paste("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Masked_SNP_Tables_All/BTimori/",subBTGroups[b],sep=""), sep="\t", header=FALSE,stringsAsFactors = FALSE))
colnames(TempSNP)<-c("Chr","Pos","Zyg")
TotalSNPs<-rbind(TempSNP,TotalSNPs)
}
ChrList<-Bm.chr.res$V1
for (b in 1:length(ChrList))
{
ChrName<-strsplit(Bm.chr.res[b,]$V1,"_")[[1]][3]
subContigs<-TimAssignments_Chrs[grep(ChrName,TimAssignments_Chrs$Chr),]
subContigs<-subContigs[order(subContigs$Start),]
sumpos<-0
for (c in 1:length(subContigs$Contig))
{
ChrSize<-as.numeric(BT.res[BT.res$V1 == subContigs[c,]$Contig,]$V2)
subSNPs<-TotalSNPs[TotalSNPs$Chr == subContigs[c,]$Contig,]
PartialGraphing<-data.frame(matrix(ncol=5,nrow=ceiling(ChrSize/10000)),stringsAsFactors = FALSE)
colnames(PartialGraphing)<-c("Position","SumPosition","Pi","Chr","subChr")
for (d in 1:ceiling(ChrSize/10000))
{
if(d<ceiling(ChrSize/10000)){
pi<-(length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "het",]$Pos)+(2*length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "hom",]$Pos)))/(20000*SampleFactor)
PartialGraphing[d,]$Position<-(d*10000)
PartialGraphing[d,]$SumPosition<-(d*10000)+sumpos
PartialGraphing[d,]$Pi<-pi
PartialGraphing[d,]$Chr<-ChrName
PartialGraphing[d,]$subChr<-as.character(subContigs[c,]$Contig)
} else {
pi<-(length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= ChrSize & subSNPs$Zyg == "het",]$Pos)+(2*length(subSNPs[subSNPs$Pos >= ((d*10000)-9999) & subSNPs$Pos <= (d*10000) & subSNPs$Zyg == "hom",]$Pos)))/((ChrSize-((d*10000)-10000))*SampleFactor)
PartialGraphing[d,]$Position<-ChrSize
PartialGraphing[d,]$SumPosition<-ChrSize+sumpos
PartialGraphing[d,]$Pi<-pi
PartialGraphing[d,]$Chr<-ChrName
PartialGraphing[d,]$subChr<-as.character(subContigs[c,]$Contig)
}
}
sumpos<-sumpos+ChrSize
Graphing<-rbind(Graphing,PartialGraphing)
if(length(Graphing[is.na(Graphing$Pi),]$Pi) > 0){
stop()
}
}
}
pi_ymax<-max(Graphing$Pi)
TotalMax<-c(TotalMax,pi_ymax)
TotalMaxPreCalc<-pi_ymax
Graphing$Position<-Graphing$Position/1000000
Graphing$SumPosition<-Graphing$SumPosition/1000000
g1<-ggplot(Graphing[Graphing$Chr == "Chr1",]) + geom_point(aes(x=SumPosition,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("A") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g2<-ggplot(Graphing[Graphing$Chr == "Chr2",]) + geom_point(aes(x=SumPosition,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("B") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g3<-ggplot(Graphing[Graphing$Chr == "Chr3",]) + geom_point(aes(x=SumPosition,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("C") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g4<-ggplot(Graphing[Graphing$Chr == "Chr4",]) + geom_point(aes(x=SumPosition,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("D") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
g5<-ggplot(Graphing[Graphing$Chr == "ChrX",]) + geom_point(aes(x=SumPosition,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("E") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(breaks = trans_breaks(identity, identity, n = 3),expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,TotalMaxPreCalc))
#print(ggarrange(g1,g2,g3,g4,g5,ncol=1,nrow=5,common.legend = TRUE,legend = "bottom"))
gl<-list(g1,g2,g3,g4,g5)
print(grid.arrange(grobs = gl, widths = c(5,5),layout_matrix = rbind(c(1,2),c(3,4),c(5,5))))
BTimoriGraphing<-Graphing
}
dev.off()
####G-Pho Sequence File Sorting
BMalayiGraphing_GPHO<-BMalayiGraphing
BMalayiGraphing_GPHO$Position<-BMalayiGraphing_GPHO$Position*1000000
BMalayiGraphing_GPHO<-BMalayiGraphing_GPHO[order(-BMalayiGraphing_GPHO$Pi),]
BMalayiGraphing_GPHO<-BMalayiGraphing_GPHO[BMalayiGraphing_GPHO$Chr != "ChrX",]
BMalayiGraphing_GPHO<-BMalayiGraphing_GPHO[1:1000,]
GPho_Coords<-data.frame(BMalayiGraphing_GPHO$Chr,(BMalayiGraphing_GPHO$Position-9999),BMalayiGraphing_GPHO$Position)
colnames(GPho_Coords)<-c("Chr","Start","Stop")
GPho_Coords$Start <- format(GPho_Coords$Start, scientific = FALSE)
GPho_Coords$Stop <- format(GPho_Coords$Stop, scientific = FALSE)
write.table(GPho_Coords,"/Users/jmattick/Documents/GPhoCoords.bed",row.names = FALSE,col.names = FALSE,sep = "\t",quote = FALSE)
###PCA Plots
##BP
Bp.vec<-as.data.frame(read.delim("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Brugia_SNP_Tables/PCA/BP.plink2.eigenvec",header = TRUE,stringsAsFactors = FALSE))
Bp.val<-as.data.frame(read.delim("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Brugia_SNP_Tables/PCA/BP.plink2.eigenval",header = FALSE,stringsAsFactors = FALSE))
Bp.vec.subset<-Bp.vec[,1:4]
Bp.vec.subset$SampleGroup<-c("MultiAF",rep("FR3",3),rep("Clinical",3),rep("FR3",4))
#print(ggplot(gene345) + geom_point(aes(x=miRNA,y=mRNA,color=Sample,size=5)) + theme_bw() + ggtitle("WB_new_34 miRNA Depth vs mRNA depth") + theme(plot.title = element_text(size=14)) + xlim(0,7000) + ylim(0,2000) + scale_color_manual(values = colors)+ theme(legend.text=element_text(size=12),legend.key.size = unit(12,"point")))
p1<-ggplot(Bp.vec.subset) + geom_point(aes(x=PC1,y=PC2,shape=SampleGroup,color=SampleGroup),size=3) + theme_bw() + xlab(paste("PC1: ",round(as.numeric(Bp.val[1,1]),1),"%",sep="")) + ylab(paste("PC2: ",round(as.numeric(Bp.val[2,1]),1),"%",sep=""))+ theme(legend.text=element_text(size=12),legend.key.size = unit(12,"point"),axis.text=element_text(size=16),axis.title=element_text(size=16))
pdf("/Users/jmattick/Documents/BPPCa.pdf")
print(p1)
dev.off()
Bp.vec.noclin<-as.data.frame(read.delim("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Brugia_SNP_Tables/PCA/BP.plink2.noclin.eigenvec",header = TRUE,stringsAsFactors = FALSE))
Bp.val.noclin<-as.data.frame(read.delim("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Brugia_SNP_Tables/PCA/BP.plink2.noclin.eigenval",header = FALSE,stringsAsFactors = FALSE))
Bp.vec.subset.noclin<-Bp.vec.noclin[,1:4]
Bp.vec.subset.noclin$SampleGroup<-c("MultiAF",rep("AM",3),rep("FR3",4))
p2<-ggplot(Bp.vec.subset.noclin) + geom_point(aes(x=PC1,y=PC2,shape=SampleGroup,color=SampleGroup),size=4) + theme_bw() + xlab(paste("PC1: ",round(as.numeric(Bp.val.noclin[1,1]),1),"%",sep="")) + ylab(paste("PC2: ",round(as.numeric(Bp.val.noclin[2,1]),1),"%",sep=""))
Bm.vec<-as.data.frame(read.delim("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Brugia_SNP_Tables/PCA/BM.plink2.eigenvec",header = TRUE,stringsAsFactors = FALSE))
Bm.val<-as.data.frame(read.delim("/Volumes/projects-t3/EBMAL/SNP_MALE_GATK_Best_Practices/Brugia_SNP_Tables/PCA/BM.plink2.eigenval",header = FALSE,stringsAsFactors = FALSE))
Bm.vec.subset<-Bm.vec[,1:6]
Bm.vec.subset$SampleGroup<-c(rep("Lucknow",4),rep("TRS",2),rep("Thailand",4),rep("FR3",4),rep("TRS",2),rep("Liverpool",4),rep("WashU",6))
p3<-ggplot(Bm.vec.subset) + geom_point(aes(x=PC1,y=PC2,shape=SampleGroup,color=SampleGroup),size=3) + theme_bw() + xlab(paste("PC1: ",round(as.numeric(Bm.val[1,1]),1),"%",sep="")) + ylab(paste("PC2: ",round(as.numeric(Bm.val[2,1]),1),"%",sep=""))+ theme(legend.text=element_text(size=12),legend.key.size = unit(12,"point"),axis.text=element_text(size=16),axis.title=element_text(size=16))
p4<-ggplot(Bm.vec.subset) + geom_point(aes(x=PC2,y=PC3,shape=SampleGroup,color=SampleGroup),size=1) + theme_bw() + xlab(paste("PC2: ",round(as.numeric(Bm.val[2,1]),1),"%",sep="")) + ylab(paste("PC3: ",round(as.numeric(Bm.val[3,1]),1),"%",sep=""))
p5<-ggplot(Bm.vec.subset) + geom_point(aes(x=PC3,y=PC4,shape=SampleGroup,color=SampleGroup),size=1) + theme_bw() + xlab(paste("PC3: ",round(as.numeric(Bm.val[3,1]),1),"%",sep="")) + ylab(paste("PC4: ",round(as.numeric(Bm.val[4,1]),1),"%",sep=""))
pdf("/Users/jmattick/Documents/BMPCa.pdf")
print(p3)
dev.off()
pdf("/Users/jmattick/Documents/Pop.Genome.Figure3.pdf",useDingbats=FALSE)
print(p1)
dev.off()
pdf("/Users/jmattick/Documents/Pop.Genome.Figure4.pdf",useDingbats=FALSE)
print(p3)
dev.off()
pdf("/Users/jmattick/Documents/Pop.Genome.All_BMPCAs.pdf",useDingbats=FALSE)
print(p3)
print(p4)
print(p5)
dev.off()
###Box Plots
##BP vs BM
BMalayiGraphing$Species<-"BMalayi"
BPahangiGraphing$Species<-"BPahangi"
TotalGraphing<-rbind(BMalayiGraphing,BPahangiGraphing)
pdf("/Users/jmattick/Documents/Pop.Genome.FigureE.pdf",useDingbats=FALSE)
ggplot(TotalGraphing, aes(x=Species, y=Pi,color=Species)) + geom_boxplot(notch=TRUE) + ggtitle(expression(paste("SNV Density between ",italic("B. malayi")," and ",italic("B. pahangi"),sep=""))) + theme_bw()
dev.off()
TotalGraphing$Region<-"NonX"
TotalGraphing[TotalGraphing$Species == "BMalayi" & TotalGraphing$Chr == "ChrX" & TotalGraphing$Pos > 4 & TotalGraphing$Pos < 20,]$Region<-"CentralX"
TotalGraphing[TotalGraphing$Species == "BPahangi" & TotalGraphing$subChr == "BP_ChrX_b" & TotalGraphing$Pos > 1 & TotalGraphing$Pos < 17,]$Region<-"CentralX"
q1<-ggplot(TotalGraphing[TotalGraphing$Species == "BMalayi",], aes(x=Pi,color=Region)) + geom_line(stat="density") + ggtitle(expression(paste("SNV Density between non-X and X regions in ",italic("B. malayi"),sep=""))) + theme_bw() + xlim(0,0.0025) + ylab("10kb Windows")
q2<-ggplot(TotalGraphing[TotalGraphing$Species == "BPahangi",], aes(x=Pi,color=Region)) + geom_line(stat="density") + ggtitle(expression(paste("SNV Density between non-X and X regions in ",italic("B. pahangi"),sep=""))) + theme_bw() + xlim(0,0.01) + ylab("10kb Windows")
gl<-list(q1,q2)
pdf("/Users/jmattick/Documents/Pop.Genome.Density.pdf",useDingbats=FALSE)
print(grid.arrange(grobs = gl))
dev.off()
print(mean(TotalGraphing[TotalGraphing$Species == "BMalayi" & TotalGraphing$Region == "CentralX",]$Pi)/mean(TotalGraphing[TotalGraphing$Species == "BMalayi" & TotalGraphing$Region == "NonX",]$Pi))
print(mean(TotalGraphing[TotalGraphing$Species == "BPahangi" & TotalGraphing$Region == "CentralX",]$Pi)/mean(TotalGraphing[TotalGraphing$Species == "BPahangi" & TotalGraphing$Region == "NonX",]$Pi))
###X vs Non X Density
BMDensityGraphing<-BMalayiGraphing
BMDensityGraphing$Region<-"NonX"
BMDensityGraphing[BMDensityGraphing$Chr == "ChrX",]$Region<-"X"
q1<-ggplot(BMDensityGraphing, aes(x=Pi,color=Region)) + geom_line(stat="density") + ggtitle(expression(paste("SNV Density between non-X and X regions in ",italic("B. malayi"),sep=""))) + theme_bw() + xlim(0,0.0025) + ylab("10kb Windows")
pdf("/Users/jmattick/Documents/Pop.Genome.BM.Density.pdf",useDingbats=FALSE,height=4,width=8)
print(q1)
dev.off()
BPDensityGraphing<-BPahangiGraphing
BPDensityGraphing$Region<-"NonX"
BPDensityGraphing[BPDensityGraphing$Chr == "ChrX",]$Region<-"X"
q2<-ggplot(BPDensityGraphing, aes(x=Pi,color=Region)) + geom_line(stat="density") + ggtitle(expression(paste("SNV Density between non-X and X regions in ",italic("B. pahangi"),sep=""))) + theme_bw() + xlim(0,0.0025) + ylab("10kb Windows")
pdf("/Users/jmattick/Documents/Pop.Genome.BP.Density.pdf",useDingbats=FALSE,height=4,width=8)
print(q2)
dev.off()
WBDensityGraphing<-WBancroftiGraphing
WBDensityGraphing$Region<-"NonX"
WBDensityGraphing[WBDensityGraphing$Chr == "ChrX",]$Region<-"X"
q3<-ggplot(WBDensityGraphing, aes(x=Pi,color=Region)) + geom_line(stat="density") + ggtitle(expression(paste("SNV Density between non-X and X regions in ",italic("W. bancrofti"),sep=""))) + theme_bw() + xlim(0,0.0025) + ylab("10kb Windows")
pdf("/Users/jmattick/Documents/Pop.Genome.WB.Density.pdf",useDingbats=FALSE,height=4,width=8)
print(q3)
dev.off()
BTDensityGraphing<-BTimoriGraphing
BTDensityGraphing$Region<-"NonX"
BTDensityGraphing[BTDensityGraphing$Chr == "ChrX",]$Region<-"X"
q4<-ggplot(BTDensityGraphing, aes(x=Pi,color=Region)) + geom_line(stat="density") + ggtitle(expression(paste("SNV Density between non-X and X regions in ",italic("B. timori"),sep=""))) + theme_bw() + xlim(0,0.01) + ylab("10kb Windows")
pdf("/Users/jmattick/Documents/Pop.Genome.BT.Density.pdf",useDingbats=FALSE,height=4,width=8)
print(q4)
dev.off()
gl<-list(q1,q2,q3,q4)
pdf("/Users/jmattick/Documents/Pop.Genome.BMBPWBBT.Density.pdf",useDingbats=FALSE)
print(grid.arrange(grobs = gl))
dev.off()
######FASTA FILE ANALYSIS: GC Content
library("seqinr")
brugia.unmasked <- read.fasta(file = "/Volumes/projects-t3/EBMAL/JMattick_miRNA_Wolbachia/Bm.v4.all.fa")
brugia.masked <- read.fasta(file = "/Volumes/projects-t3/EBMAL/JMattick_miRNA_Wolbachia/brugia_malayi.PRJNA10729.WBPS14.genomic_masked.fa")
brugia.genemasked <- read.fasta(file = "/Volumes/projects-t3/EBMAL/JMattick_miRNA_Wolbachia/Brugia.malayi.WGS.genemasked.fa")
AllNames<-getName(brugia.unmasked)
ChrNameVector<-grep("Chr",AllNames)
Graphing<-data.frame()
for (a in ChrNameVector)
{
subFASTA<-brugia.unmasked[[a]]
ChrSize<-length(getSequence(subFASTA))
ChrFullName<-getName(subFASTA)
ChrName<-strsplit(ChrFullName,"_")[[1]][3]
submaskedFASTA<-brugia.masked[[grep(ChrFullName,getName(brugia.masked))]]
subgenemaskedFASTA<-brugia.genemasked[[grep(ChrFullName,getName(brugia.genemasked))]]
PartialGraphing<-data.frame(matrix(ncol=5,nrow=ceiling(ChrSize/10000)),stringsAsFactors = FALSE)
colnames(PartialGraphing)<-c("Chr","Position","GC","Repeats","Genes")
for (d in 1:ceiling(ChrSize/10000))
{
if(d<ceiling(ChrSize/10000)){
subGC<-GC(getFrag(subFASTA,((d*10000)-9999),(d*10000)))
if(is.na(subGC)){
subGC<-0
subRepeatPerc<-0
subGenePerc<-0
}else{
subRepeatTable<-as.data.frame(table(getFrag(submaskedFASTA,((d*10000)-9999),(d*10000))[1:length(getFrag(submaskedFASTA,((d*10000)-9999),(d*10000)))]))
if(length(subRepeatTable[subRepeatTable$Var1 == "n",]$Freq) > 0){
subRepeatPerc<-100*as.numeric(subRepeatTable[subRepeatTable$Var1 == "n",]$Freq)/10000
}else{
subRepeatPerc<-0
}
subGeneTable<-as.data.frame(table(getFrag(subgenemaskedFASTA,((d*10000)-9999),(d*10000))[1:length(getFrag(subgenemaskedFASTA,((d*10000)-9999),(d*10000)))]))
if(length(subGeneTable[subGeneTable$Var1 == "n",]$Freq) > 0){
subGenePerc<-100*as.numeric(subGeneTable[subGeneTable$Var1 == "n",]$Freq)/10000
}else{
subGenePerc<-0
}
}
PartialGraphing[d,]$Position<-(d*10000)
PartialGraphing[d,]$Chr<-ChrName
PartialGraphing[d,]$GC<-100*subGC
PartialGraphing[d,]$Repeats<-subRepeatPerc
PartialGraphing[d,]$Genes<-subGenePerc
} else {
subGC<-GC(getFrag(subFASTA,((d*10000)-9999),ChrSize))
if(is.na(subGC)){
subGC<-0
subRepeatPerc<-0
subGenePerc<-0
}else{
subRepeatTable<-as.data.frame(table(getFrag(submaskedFASTA,((d*10000)-9999),ChrSize)[1:length(getFrag(submaskedFASTA,((d*10000)-9999),ChrSize))]))
if(length(subRepeatTable[subRepeatTable$Var1 == "n",]$Freq) > 0){
subRepeatPerc<-100*as.numeric(subRepeatTable[subRepeatTable$Var1 == "n",]$Freq)/(ChrSize-((d*10000)-10000))
}else{
subRepeatPerc<-0
}
subGeneTable<-as.data.frame(table(getFrag(subgenemaskedFASTA,((d*10000)-9999),ChrSize)[1:length(getFrag(subgenemaskedFASTA,((d*10000)-9999),ChrSize))]))
if(length(subGeneTable[subGeneTable$Var1 == "n",]$Freq) > 0){
subGenePerc<-100*as.numeric(subGeneTable[subGeneTable$Var1 == "n",]$Freq)/(ChrSize-((d*10000)-10000))
}else{
subGenePerc<-0
}
}
PartialGraphing[d,]$Position<-ChrSize
PartialGraphing[d,]$Chr<-ChrName
PartialGraphing[d,]$GC<-100*subGC
PartialGraphing[d,]$Repeats<-subRepeatPerc
PartialGraphing[d,]$Genes<-subGenePerc
}
}
Graphing<-rbind(Graphing,PartialGraphing)
}
GC_Graphing<-Graphing
write.table(GC_Graphing,"/Users/jmattick/Documents/GC_Graphing.tsv",row.names = FALSE,col.names = T,sep = "\t",quote = FALSE)
library(gridExtra)
library(grid)
library(ggplot2)
library(lattice)
pdf("/Users/jmattick/Documents/BMalayi.ChromosomeStats.pdf",useDingbats=FALSE)
####GC
g1<-ggplot(Graphing[Graphing$Chr == "Chr1",]) + geom_point(aes(x=Position,y=GC),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("GC Plots: A") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,50))
g2<-ggplot(Graphing[Graphing$Chr == "Chr2",]) + geom_point(aes(x=Position,y=GC),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("GC Plots: B") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,50))
g3<-ggplot(Graphing[Graphing$Chr == "Chr3",]) + geom_point(aes(x=Position,y=GC),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("GC Plots: C") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,50))
g4<-ggplot(Graphing[Graphing$Chr == "Chr4",]) + geom_point(aes(x=Position,y=GC),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("GC Plots: D") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,50))
g5<-ggplot(Graphing[Graphing$Chr == "ChrX",]) + geom_point(aes(x=Position,y=GC),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("GC Plots: E") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,50))
#print(ggarrange(g1,g2,g3,g4,g5,ncol=1,nrow=5,common.legend = TRUE,legend = "bottom"))
gl<-list(g1,g2,g3,g4,g5)
print(grid.arrange(grobs = gl, widths = c(5,5),layout_matrix = rbind(c(1,2),c(3,4),c(5,5))))
####Repeat Density
g1<-ggplot(Graphing[Graphing$Chr == "Chr1",]) + geom_point(aes(x=Position,y=Repeats),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Repeat Plots: A") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,105))
g2<-ggplot(Graphing[Graphing$Chr == "Chr2",]) + geom_point(aes(x=Position,y=Repeats),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Repeat Plots: B") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,105))
g3<-ggplot(Graphing[Graphing$Chr == "Chr3",]) + geom_point(aes(x=Position,y=Repeats),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Repeat Plots: C") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,105))
g4<-ggplot(Graphing[Graphing$Chr == "Chr4",]) + geom_point(aes(x=Position,y=Repeats),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Repeat Plots: D") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,105))
g5<-ggplot(Graphing[Graphing$Chr == "ChrX",]) + geom_point(aes(x=Position,y=Repeats),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Repeat Plots: E") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,105))
#print(ggarrange(g1,g2,g3,g4,g5,ncol=1,nrow=5,common.legend = TRUE,legend = "bottom"))
gl<-list(g1,g2,g3,g4,g5)
print(grid.arrange(grobs = gl, widths = c(5,5),layout_matrix = rbind(c(1,2),c(3,4),c(5,5))))
####Repeat Density
g1<-ggplot(Graphing[Graphing$Chr == "Chr1",]) + geom_point(aes(x=Position,y=Genes),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Gene Plots: A") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,105))
g2<-ggplot(Graphing[Graphing$Chr == "Chr2",]) + geom_point(aes(x=Position,y=Genes),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Gene Plots: B") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,105))
g3<-ggplot(Graphing[Graphing$Chr == "Chr3",]) + geom_point(aes(x=Position,y=Genes),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Gene Plots: C") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,105))
g4<-ggplot(Graphing[Graphing$Chr == "Chr4",]) + geom_point(aes(x=Position,y=Genes),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Gene Plots: D") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,105))
g5<-ggplot(Graphing[Graphing$Chr == "ChrX",]) + geom_point(aes(x=Position,y=Genes),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Gene Plots: E") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Position (Mb)")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0),limits=c(0,105))
#print(ggarrange(g1,g2,g3,g4,g5,ncol=1,nrow=5,common.legend = TRUE,legend = "bottom"))
gl<-list(g1,g2,g3,g4,g5)
print(grid.arrange(grobs = gl, widths = c(5,5),layout_matrix = rbind(c(1,2),c(3,4),c(5,5))))
dev.off()
pdf("/Users/jmattick/Documents/BMalayi.ChromosomeStats.vsSNVs.pdf",useDingbats=FALSE)
Combined.GC.SNV.frame<-GC_Graphing
Combined.GC.SNV.frame$Pi<-BMalayiGraphing$Pi
####GC
g1<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr1",]) + geom_point(aes(x=GC,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("GC vs SNV Plots: A") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent GC")+scale_x_continuous(expand = c(0,0),limits=c(0,50))+scale_y_continuous(expand = c(0,0))
g2<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr2",]) + geom_point(aes(x=GC,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("GC vs SNV Plots: B") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent GC")+scale_x_continuous(expand = c(0,0),limits=c(0,50))+scale_y_continuous(expand = c(0,0))
g3<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr3",]) + geom_point(aes(x=GC,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("GC vs SNV Plots: C") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent GC")+scale_x_continuous(expand = c(0,0),limits=c(0,50))+scale_y_continuous(expand = c(0,0))
g4<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr4",]) + geom_point(aes(x=GC,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("GC vs SNV Plots: D") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent GC")+scale_x_continuous(expand = c(0,0),limits=c(0,50))+scale_y_continuous(expand = c(0,0))
g5<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "ChrX",]) + geom_point(aes(x=GC,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("GC vs SNV Plots: E") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent GC")+scale_x_continuous(expand = c(0,0),limits=c(0,50))+scale_y_continuous(expand = c(0,0))
#print(ggarrange(g1,g2,g3,g4,g5,ncol=1,nrow=5,common.legend = TRUE,legend = "bottom"))
gl<-list(g1,g2,g3,g4,g5)
print(grid.arrange(grobs = gl, widths = c(5,5),layout_matrix = rbind(c(1,2),c(3,4),c(5,NA))))
####Repeat Density
g1<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr1",]) + geom_point(aes(x=Repeats,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Repeat vs SNV Plots: A") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent Repeats")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0))
g2<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr2",]) + geom_point(aes(x=Repeats,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Repeat vs SNV Plots: B") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent Repeats")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0))
g3<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr3",]) + geom_point(aes(x=Repeats,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Repeat vs SNV Plots: C") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent Repeats")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0))
g4<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr4",]) + geom_point(aes(x=Repeats,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Repeat vs SNV Plots: D") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent Repeats")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0))
g5<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "ChrX",]) + geom_point(aes(x=Repeats,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Repeat vs SNV Plots: E") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent Repeats")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0))
#print(ggarrange(g1,g2,g3,g4,g5,ncol=1,nrow=5,common.legend = TRUE,legend = "bottom"))
gl<-list(g1,g2,g3,g4,g5)
print(grid.arrange(grobs = gl, widths = c(5,5),layout_matrix = rbind(c(1,2),c(3,4),c(5,NA))))
####Repeat Density
g1<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr1",]) + geom_point(aes(x=Genes,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Gene vs SNV Plots: A") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent Genes")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0))
g2<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr2",]) + geom_point(aes(x=Genes,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Gene vs SNV Plots: B") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent Genes")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0))
g3<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr3",]) + geom_point(aes(x=Genes,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Gene vs SNV Plots: C") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent Genes")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0))
g4<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "Chr4",]) + geom_point(aes(x=Genes,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Gene vs SNV Plots: D") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent Genes")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0))
g5<-ggplot(Combined.GC.SNV.frame[Combined.GC.SNV.frame$Chr == "ChrX",]) + geom_point(aes(x=Genes,y=Pi),size=0.1) + theme_bw() + facet_grid(~Chr,scales="free",space="free") + ggtitle("Gene vs SNV Plots: E") + theme(legend.position="none",plot.title = element_text(size=14,face = "bold"),plot.margin = unit(c(0.25,0.25,0.25,0.25), "cm"))+xlab("Percent Genes")+scale_x_continuous(expand = c(0,0))+scale_y_continuous(expand = c(0,0))
#print(ggarrange(g1,g2,g3,g4,g5,ncol=1,nrow=5,common.legend = TRUE,legend = "bottom"))
gl<-list(g1,g2,g3,g4,g5)