-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest-modify-vcf.R
126 lines (112 loc) · 4.1 KB
/
test-modify-vcf.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
library(testthat)
test_that("modify the genotypes", {
## skip_on_os(c("windows"), arch = NULL)
outvcf <- paste0(tempfile(), ".vcf.gz")
bw <- vcfwriter$new(outvcf, "VCF4.3")
bw$addContig("chr20")
bw$addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)");
bw$addFORMAT("GT", "1", "String", "Genotype");
bw$addSample("NA12878")
bw$addSample("NA12879")
s1 <- "chr20\t2006060\trs146931526\tG\tC\t100\tPASS\tAF=0.000998403\tGT\t1|0\t1/1"
bw$writeline(s1)
bw$close()
## tests
br <- vcfreader$new(outvcf)
br$variant() ## get a variant record
g0 <- br$genotypes(F)
## if you wanna change the phasing of genotypes,
## call setPhasing before setGenotypes
br$setPhasing(c(1L, 1L))
g1 <- c(1L, 1L, NA, 0L)
br$setGenotypes(g1)
g2 <- br$genotypes(F)
expect_false(isTRUE(all.equal(g0, g2)))
expect_identical(g1, g2)
## the original vcf can not be modified
br <- vcfreader$new(outvcf)
br$variant() ## get a variant record
g3 <- br$genotypes(F)
expect_identical(g0, g3)
})
test_that("modify item in FORMAT for all samples", {
## skip_on_os(c("windows"), arch = NULL)
## creat a VCF with GP in FORMAT
outvcf <- paste0(tempfile(), ".vcf.gz")
bw <- vcfwriter$new(outvcf, "VCF4.3")
bw$addContig("chr20")
bw$addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)");
bw$addFORMAT("GP", "3", "Float", "Posterior genotype probability of 0/0, 0/1, and 1/1");
bw$addSample("NA12878")
bw$addSample("NA12879")
s1 <- "chr20\t2006060\trs146931526\tG\tC\t100\tPASS\tAF=0.000998403\tGP\t0.966,0.034,0\t0.003,0.872,0.125"
bw$writeline(s1)
bw$close()
## tests
br <- vcfreader$new(outvcf)
br$variant() ## get a variant record
br$string()
gp <- br$formatFloat("GP")
gp <- array(gp, c(3, br$nsamples()))
ds <- gp[2,] + gp[3,] * 2
## will prompt uses a message if `output` is not called
br$addFORMAT("DS", "1", "Float", "Diploid dosage")
## now open another file for output
newvcf <- paste0(tempfile(), ".vcf.gz")
br$output(newvcf)
## add INFO, DS in header first
br$addINFO("INFO", "1", "Float", "INFO score of imputation")
br$addFORMAT("DS", "1", "Float", "Diploid dosage")
br$addFORMAT("AC", "1", "Integer", "Allele counts")
br$addFORMAT("STR", "1", "String", "Test String type")
## print(br$header())
## set DS in FORMAT now
br$setFormatFloat("DS", ds)
## test if DS presents
expect_identical(br$formatFloat("DS"), ds)
## more tests
br$setFormatInt("AC", c(3L, 4L))
expect_false(br$setFormatStr("STR","HHH,JJJ")) ## length(s) %% nsamples != 0
expect_true(br$setFormatStr("STR","HHHJJJ")) ## length(s) %% nsamples == 0
## print(br$string())
})
test_that("modify item in FORMAT for specific sample", {
## skip_on_os(c("windows"), arch = NULL)
## creat a VCF with GP in FORMAT
outvcf <- paste0(tempfile(), ".vcf.gz")
bw <- vcfwriter$new(outvcf, "VCF4.3")
bw$addContig("chr20")
bw$addINFO("AF", "A", "Float", "Estimated allele frequency in the range (0,1)");
bw$addFORMAT("GP", "3", "Float", "Posterior genotype probability of 0/0, 0/1, and 1/1");
bw$addSample("NA12878")
bw$addSample("NA12879")
s1 <- "chr20\t2006060\trs146931526\tG\tC\t100\tPASS\tAF=0.000998403\tGP\t0.966,0.034,0\t0.003,0.872,0.125"
bw$writeline(s1)
bw$close()
## tests
br <- vcfreader$new(outvcf, region = "", samples = "NA12878")
br$variant() ## get a variant record
br$string()
br$samples()
gp <- br$formatFloat("GP")
gp <- array(gp, c(3, br$nsamples()))
ds <- gp[2,] + gp[3,] * 2
## now open another file for output
newvcf <- paste0(tempfile(), ".vcf.gz")
br$output(newvcf)
## add INFO, DS in header first
br$addINFO("INFO", "1", "Float", "INFO score of imputation")
br$addFORMAT("DS", "1", "Float", "Diploid dosage")
br$addFORMAT("AC", "1", "Integer", "Allele counts")
br$addFORMAT("STR", "1", "String", "Test String type")
## print(br$header())
## set DS in FORMAT now
br$setFormatFloat("DS", ds[1])
## test if DS presents
expect_identical(br$formatFloat("DS"), ds[1])
br$string()
br$write()
br$close()
vcf <- vcftable(newvcf, format = "DS")
expect_true(vcf$DS==ds[1])
})