You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
> DamageProfiler Neukamm, J., Peltzer, A., & Nieselt, K. (2020). DamageProfiler: Fast damage pattern calculation for ancient DNA. In Bioinformatics (btab190). doi: [10.1093/bioinformatics/btab190](https://doi.org/10.1093/bioinformatics/btab190). Download: https://github.com/Integrative-Transcriptomics/DamageProfiler
106
+
107
+
> DamageProfiler Neukamm, J., Peltzer, A., & Nieselt, K. (2020). DamageProfiler: Fast damage pattern calculation for ancient DNA. In Bioinformatics (btab190). doi: [10.1093/bioinformatics/btab190](https://doi.org/10.1093/bioinformatics/btab190).
> DePristo M, Banks E, Poplin R, Garimella K, Maguire J, Hartl C, Philippakis A, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell T, Kernytsky A, Sivachenko A, Cibulskis K, Gabriel S, Altshuler D, Daly M. (2011). A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nature Genetics, 43(5), 491–498. doi: [10.1038/ng.806](https://doi.org/10.1038/ng.806).
> Garrison E, Marth G. Haplotype-based variant detection from short-read sequencing. arXiv preprint arXiv:1207.3907 \[q-bio.GN] 2012. doi: [10.48550/arXiv.1207.3907](https://doi.org/10.48550/arXiv.1207.3907).
120
+
121
+
-[BCFtools](https://github.com/samtools/bcftools)
122
+
123
+
> Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics (2011) 27(21) 2987-93.doi: [10.1093/bioinformatics/btr509](https://doi.org/10.1093/bioinformatics/btr509).
## Function to check that the snp files are identical
32
+
defcheck_snp_files(snpf1, snpf2):
33
+
ifnotfilecmp.cmp(snpf1, snpf2):
34
+
raiseIOError("Input .snp files are not identical.")
35
+
36
+
## Function to check the consistency of an eigenstrat database
37
+
defvalidate_eigenstrat(genof, snpf, indf):
38
+
dimsGeno= [file_len(genof), file_width(genof)]
39
+
linesSnp=file_len(snpf)
40
+
linesInd=file_len(indf)
41
+
42
+
# print(dimsGeno,linesSnp,linesInd)
43
+
##Check geno and snp compatibility
44
+
ifdimsGeno[0] !=linesSnp:
45
+
raiseIOError("Input .snp and .geno files do not match.")
46
+
47
+
##Check geno and ind compatibility
48
+
ifdimsGeno[1] !=linesInd:
49
+
raiseIOError("Input .ind and .geno files do not match.")
50
+
51
+
VERSION="1.0.0"
52
+
53
+
parser=argparse.ArgumentParser(usage="%(prog)s (-i <Input file prefix>) (-c <input ind file> | -R | -E) [-L <SAMPLE LIST> | -S Ind [-S Ind2]] [-o <OUTPUT FILE PREFIX>]" , description="A tool to check two different EingenStrat databses for shared individuals, and extract or remove individuals from an EigenStrat database.")
54
+
parser._optionals.title="Available options"
55
+
parser.add_argument("-g1", "--genoFn1", type=str, metavar="<GENO FILE 1 NAME>", required=True, help="The path to the input geno file of the first dataset.")
56
+
parser.add_argument("-s1", "--snpFn1", type=str, metavar="<SNP FILE 1 NAME>", required=True, help="The path to the input snp file of the first dataset.")
57
+
parser.add_argument("-i1", "--indFn1", type=str, metavar="<IND FILE 1 NAME>", required=True, help="The path to the input ind file of the first dataset.")
58
+
parser.add_argument("-g2", "--genoFn2", type=str, metavar="<GENO FILE 2 NAME>", required=True, help="The path to the input geno file of the second dataset.")
59
+
parser.add_argument("-s2", "--snpFn2", type=str, metavar="<SNP FILE 2 NAME>", required=True, help="The path to the input snp file of the second dataset.")
60
+
parser.add_argument("-i2", "--indFn2", type=str, metavar="<IND FILE 2 NAME>", required=True, help="The path to the input ind file of the second dataset.")
61
+
parser.add_argument("-o", "--output", type=str, metavar="<OUTPUT FILES PREFIX>", required=True, help="The desired output file prefix. Three output files are created, <OUTPUT FILES PREFIX>.geno , <OUTPUT FILES PREFIX>.snp and <OUTPUT FILES PREFIX>.ind .")
62
+
parser.add_argument("-v", "--version", action='version', version="{}".format(VERSION), help="Print the version and exit.")
63
+
args=parser.parse_args()
64
+
65
+
## Open input files
66
+
GenoFile1=open(args.genoFn1, "r")
67
+
SnpFile1=open(args.snpFn1, "r")
68
+
IndFile1=open(args.indFn1, "r")
69
+
70
+
GenoFile2=open(args.genoFn2, "r")
71
+
# SnpFile2 = open(args.snpFn2, "r") ## Never actually read in line by line
params.genotyping_gatk_ug_defaultbasequalities > 0 ? "--defaultBaseQualities ${params.genotyping_gatk_ug_defaultbasequalities}" : "", // Empty string since GATK complains if its default of -1 is provided.
params.genotyping_gatk_ug_defaultbasequalities > 0 ? "--defaultBaseQualities ${params.genotyping_gatk_ug_defaultbasequalities}" : "", // Empty string since GATK complains if its default of -1 is provided.
params.genotyping_gatk_ug_defaultbasequalities > 0 ? "--defaultBaseQualities ${params.genotyping_gatk_ug_defaultbasequalities}" : "", // Empty string since GATK complains if its default of -1 is provided.
0 commit comments