Skip to content

Commit 17db16f

Browse files
authored
Merge pull request #77 from uclahs-cds/nzeltser-fix-missing-multiallelic-bug
fix missing multiallelic bug
2 parents a39e4db + da148ac commit 17db16f

File tree

5 files changed

+33
-16
lines changed

5 files changed

+33
-16
lines changed

NEWS.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
# ApplyPolygenicScore unreleased
44
## Changed
55
* Fixed regression of combine.vcf.with.pgs() function that prevented it from handling multiple rsIDs on the same line.
6-
* added new contributor
6+
* Fixed bug caused by the case of a sample-specific missing variant at a multiallelic site
7+
8+
## Added
9+
* Added new contributor
710

811
# ApplyPolygenicScore 3.0.2
912

R/handle-multiallelic-sites.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ get.non.risk.multiallelic.site.row <- function(single.sample.multialellic.pgs.wi
88
return(data.frame());
99
}
1010

11+
# handle cases where this variant is missing in this individual sample
12+
if (all(single.sample.multialellic.pgs.with.vcf.data$gt_GT_alleles == '.') | all(is.na(single.sample.multialellic.pgs.with.vcf.data$gt_GT_alleles))) { #standard importation marks missing genotypes with '.'
13+
# if all alleles are missing (variant not called for this sample), the first entry of the multiallelic site
14+
# is arbitrarily chosen to represent the sample (any choice would be NA regardless)
15+
risk.allele.site.row.index <- 1;
16+
return(single.sample.multialellic.pgs.with.vcf.data[-risk.allele.site.row.index, ]);
17+
}
18+
1119
# split genotyped alleles into separate columns
1220
GT.alleles <- data.table::tstrsplit(single.sample.multialellic.pgs.with.vcf.data$gt_GT_alleles, split = c('/|\\|'), keep = c(1,2));
1321
names(GT.alleles) <- c('called.allele.a', 'called.allele.b');
13 Bytes
Binary file not shown.

tests/testthat/generate-test-data.R

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,24 +132,26 @@ save(
132132
# create simple VCF data for testing multiallelic site handling
133133
merged.multiallelic.site.test.data <- list(
134134
merged.multiallelic.vcf.data = data.frame(
135-
CHROM = c('chr1', 'chr1', 'chr1', 'chr2', 'chr2', 'chr2', 'chr3', 'chr3', 'chr3'),
135+
CHROM = c('chr1', 'chr1', 'chr1', 'chr1', 'chr2', 'chr2', 'chr2', 'chr2', 'chr3', 'chr3', 'chr3', 'chr3'),
136136
# merged multiallelic site at chr2:2 with betas provided
137137
# merged multiallelic site at chr3:3 with no betas provided
138-
POS = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
139-
ID = c('rs1', 'rs1', 'rs1', 'rs2', 'rs2', 'rs2', 'rs3', 'rs3', 'rs3'),
140-
REF = c('T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T'),
138+
POS = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3),
139+
ID = c('rs1', 'rs1', 'rs1', 'rs1', 'rs2', 'rs2', 'rs2', 'rs2', 'rs3', 'rs3', 'rs3', 'rs3'),
140+
REF = c('T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T'),
141141
# three possible alleles at chr2:2 (T, A, C)
142-
ALT = c('A', 'A', 'A', 'A,C', 'A,C', 'A,C', 'A,G', 'A,G', 'A,G'),
143-
Indiv = c('sample1', 'sample2', 'sample3', 'sample1', 'sample2', 'sample3', 'sample1', 'sample2', 'sample3'),
142+
ALT = c('A', 'A', 'A', 'A', 'A,C', 'A,C', 'A,C', 'A,C', 'A,G', 'A,G', 'A,G', 'A,G'),
143+
Indiv = c('sample1', 'sample2', 'sample3', 'sample4', 'sample1', 'sample2', 'sample3', 'sample4', 'sample1', 'sample2', 'sample3', 'sample4'),
144144
# multiallelic genotypes at chr2:2
145145
# sample 1 is heterozygous for alt allele A
146146
# sample 2 is heterozygous for alt allele C
147147
# sample 3 is homozygous for alt allele C
148+
# sample 4 is missing this variant call
148149
# multiallelic genotypes at chr3:3
149150
# sample 1 is heterozygous for alt allele A
150151
# sample 2 is homozygous for alt allele G
151152
# sample 3 is homozygous for alt allele G
152-
gt_GT_alleles = c('T/T', 'T/A', 'A/A', 'T/A', 'T/C', 'C/C', 'T/A', 'T/G', 'G/G')
153+
# sample 4 is missing this variant call
154+
gt_GT_alleles = c('T/T', 'T/A', 'A/A', NA, 'T/A', 'T/C', 'C/C', NA, 'T/A', 'T/G', 'G/G', NA)
153155
),
154156
ref.as.single.risk.allele.multiallelic.pgs.weight.data = data.frame(
155157
CHROM = c('chr1', 'chr2', 'chr3'),

tests/testthat/test-pgs-application.R

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -392,21 +392,23 @@ test_that(
392392
# test case with VCF multiallelic site with no extra beta, REF allele is the risk allele
393393
ref.as.single.risk.allele.test <- apply.polygenic.score(
394394
vcf.data = merged.multiallelic.site.test.data$merged.multiallelic.vcf.data,
395-
pgs.weight.data = merged.multiallelic.site.test.data$ref.as.single.risk.allele.multiallelic.pgs.weight.data
395+
pgs.weight.data = merged.multiallelic.site.test.data$ref.as.single.risk.allele.multiallelic.pgs.weight.data,
396+
missing.genotype.method = 'none'
396397
);
397398
expect_equal(
398399
ref.as.single.risk.allele.test[[PGS.OUTPUT.INDEX]]$PGS,
399-
c(4, 3, 0)
400+
c(4, 3, 0, 0)
400401
);
401402

402403
# test case with VCF multiallelic sites with no extra beta, ALT allele is the risk allele
403404
alt.as.single.risk.allele.test <- apply.polygenic.score(
404405
vcf.data = merged.multiallelic.site.test.data$merged.multiallelic.vcf.data,
405-
pgs.weight.data = merged.multiallelic.site.test.data$alt.as.single.risk.allele.multiallelic.pgs.weight.data
406+
pgs.weight.data = merged.multiallelic.site.test.data$alt.as.single.risk.allele.multiallelic.pgs.weight.data,
407+
missing.genotype.method = 'none'
406408
);
407409
expect_equal(
408410
alt.as.single.risk.allele.test[[PGS.OUTPUT.INDEX]]$PGS,
409-
c(2, 1, 2)
411+
c(2, 1, 2, 0)
410412
);
411413
expect_no_warning(
412414
apply.polygenic.score(
@@ -418,11 +420,12 @@ test_that(
418420
# test case with a VCF multiallelic site that also has an extra beta, both ALT alleles are risk alleles
419421
alt.as.two.risk.alleles.test <- apply.polygenic.score(
420422
vcf.data = merged.multiallelic.site.test.data$merged.multiallelic.vcf.data,
421-
pgs.weight.data = merged.multiallelic.site.test.data$alt.as.two.risk.alleles.multiallelic.pgs.weight.data
423+
pgs.weight.data = merged.multiallelic.site.test.data$alt.as.two.risk.alleles.multiallelic.pgs.weight.data,
424+
missing.genotype.method = 'none'
422425
);
423426
expect_equal(
424427
alt.as.two.risk.alleles.test[[PGS.OUTPUT.INDEX]]$PGS,
425-
c(2, 1.5, 3)
428+
c(2, 1.5, 3, 0)
426429
);
427430
expect_warning(
428431
apply.polygenic.score(
@@ -435,11 +438,12 @@ test_that(
435438
# test case with a VCF multiallelic site that also has an extra beta, one REF and one ALT allele are risk alleles
436439
ref.and.alt.as.two.risk.alleles.test <- apply.polygenic.score(
437440
vcf.data = merged.multiallelic.site.test.data$merged.multiallelic.vcf.data,
438-
pgs.weight.data = merged.multiallelic.site.test.data$ref.and.alt.as.two.risk.alelles.multiallelic.pgs.weight.data
441+
pgs.weight.data = merged.multiallelic.site.test.data$ref.and.alt.as.two.risk.alelles.multiallelic.pgs.weight.data,
442+
missing.genotype.method = 'none'
439443
);
440444
expect_equal(
441445
ref.and.alt.as.two.risk.alleles.test[[PGS.OUTPUT.INDEX]]$PGS,
442-
c(2, 1.5, 2)
446+
c(2, 1.5, 2, 0)
443447
);
444448
expect_warning(
445449
apply.polygenic.score(

0 commit comments

Comments
 (0)