Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
# ApplyPolygenicScore unreleased
## Changed
* Fixed regression of combine.vcf.with.pgs() function that prevented it from handling multiple rsIDs on the same line.
* added new contributor
* Fixed bug caused by the case of a sample-specific missing variant at a multiallelic site

## Added
* Added new contributor

# ApplyPolygenicScore 3.0.2

Expand Down
8 changes: 8 additions & 0 deletions R/handle-multiallelic-sites.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ get.non.risk.multiallelic.site.row <- function(single.sample.multialellic.pgs.wi
return(data.frame());
}

# handle cases where this variant is missing in this individual sample
if (all(single.sample.multialellic.pgs.with.vcf.data$gt_GT_alleles == '.') | all(is.na(single.sample.multialellic.pgs.with.vcf.data$gt_GT_alleles))) { #standard importation marks missing genotypes with '.'
# if all alleles are missing (variant not called for this sample), the first entry of the multiallelic site
# is arbitrarily chosen to represent the sample (any choice would be NA regardless)
risk.allele.site.row.index <- 1;
return(single.sample.multialellic.pgs.with.vcf.data[-risk.allele.site.row.index, ]);
}

# split genotyped alleles into separate columns
GT.alleles <- data.table::tstrsplit(single.sample.multialellic.pgs.with.vcf.data$gt_GT_alleles, split = c('/|\\|'), keep = c(1,2));
names(GT.alleles) <- c('called.allele.a', 'called.allele.b');
Expand Down
Binary file modified tests/testthat/data/merged.multiallelic.site.test.data.Rda
Binary file not shown.
16 changes: 9 additions & 7 deletions tests/testthat/generate-test-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,24 +132,26 @@ save(
# create simple VCF data for testing multiallelic site handling
merged.multiallelic.site.test.data <- list(
merged.multiallelic.vcf.data = data.frame(
CHROM = c('chr1', 'chr1', 'chr1', 'chr2', 'chr2', 'chr2', 'chr3', 'chr3', 'chr3'),
CHROM = c('chr1', 'chr1', 'chr1', 'chr1', 'chr2', 'chr2', 'chr2', 'chr2', 'chr3', 'chr3', 'chr3', 'chr3'),
# merged multiallelic site at chr2:2 with betas provided
# merged multiallelic site at chr3:3 with no betas provided
POS = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
ID = c('rs1', 'rs1', 'rs1', 'rs2', 'rs2', 'rs2', 'rs3', 'rs3', 'rs3'),
REF = c('T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T'),
POS = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3),
ID = c('rs1', 'rs1', 'rs1', 'rs1', 'rs2', 'rs2', 'rs2', 'rs2', 'rs3', 'rs3', 'rs3', 'rs3'),
REF = c('T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T'),
# three possible alleles at chr2:2 (T, A, C)
ALT = c('A', 'A', 'A', 'A,C', 'A,C', 'A,C', 'A,G', 'A,G', 'A,G'),
Indiv = c('sample1', 'sample2', 'sample3', 'sample1', 'sample2', 'sample3', 'sample1', 'sample2', 'sample3'),
ALT = c('A', 'A', 'A', 'A', 'A,C', 'A,C', 'A,C', 'A,C', 'A,G', 'A,G', 'A,G', 'A,G'),
Indiv = c('sample1', 'sample2', 'sample3', 'sample4', 'sample1', 'sample2', 'sample3', 'sample4', 'sample1', 'sample2', 'sample3', 'sample4'),
# multiallelic genotypes at chr2:2
# sample 1 is heterozygous for alt allele A
# sample 2 is heterozygous for alt allele C
# sample 3 is homozygous for alt allele C
# sample 4 is missing this variant call
# multiallelic genotypes at chr3:3
# sample 1 is heterozygous for alt allele A
# sample 2 is homozygous for alt allele G
# sample 3 is homozygous for alt allele G
gt_GT_alleles = c('T/T', 'T/A', 'A/A', 'T/A', 'T/C', 'C/C', 'T/A', 'T/G', 'G/G')
# sample 4 is missing this variant call
gt_GT_alleles = c('T/T', 'T/A', 'A/A', NA, 'T/A', 'T/C', 'C/C', NA, 'T/A', 'T/G', 'G/G', NA)
),
ref.as.single.risk.allele.multiallelic.pgs.weight.data = data.frame(
CHROM = c('chr1', 'chr2', 'chr3'),
Expand Down
20 changes: 12 additions & 8 deletions tests/testthat/test-pgs-application.R
Original file line number Diff line number Diff line change
Expand Up @@ -392,21 +392,23 @@ test_that(
# test case with VCF multiallelic site with no extra beta, REF allele is the risk allele
ref.as.single.risk.allele.test <- apply.polygenic.score(
vcf.data = merged.multiallelic.site.test.data$merged.multiallelic.vcf.data,
pgs.weight.data = merged.multiallelic.site.test.data$ref.as.single.risk.allele.multiallelic.pgs.weight.data
pgs.weight.data = merged.multiallelic.site.test.data$ref.as.single.risk.allele.multiallelic.pgs.weight.data,
missing.genotype.method = 'none'
);
expect_equal(
ref.as.single.risk.allele.test[[PGS.OUTPUT.INDEX]]$PGS,
c(4, 3, 0)
c(4, 3, 0, 0)
);

# test case with VCF multiallelic sites with no extra beta, ALT allele is the risk allele
alt.as.single.risk.allele.test <- apply.polygenic.score(
vcf.data = merged.multiallelic.site.test.data$merged.multiallelic.vcf.data,
pgs.weight.data = merged.multiallelic.site.test.data$alt.as.single.risk.allele.multiallelic.pgs.weight.data
pgs.weight.data = merged.multiallelic.site.test.data$alt.as.single.risk.allele.multiallelic.pgs.weight.data,
missing.genotype.method = 'none'
);
expect_equal(
alt.as.single.risk.allele.test[[PGS.OUTPUT.INDEX]]$PGS,
c(2, 1, 2)
c(2, 1, 2, 0)
);
expect_no_warning(
apply.polygenic.score(
Expand All @@ -418,11 +420,12 @@ test_that(
# test case with a VCF multiallelic site that also has an extra beta, both ALT alleles are risk alleles
alt.as.two.risk.alleles.test <- apply.polygenic.score(
vcf.data = merged.multiallelic.site.test.data$merged.multiallelic.vcf.data,
pgs.weight.data = merged.multiallelic.site.test.data$alt.as.two.risk.alleles.multiallelic.pgs.weight.data
pgs.weight.data = merged.multiallelic.site.test.data$alt.as.two.risk.alleles.multiallelic.pgs.weight.data,
missing.genotype.method = 'none'
);
expect_equal(
alt.as.two.risk.alleles.test[[PGS.OUTPUT.INDEX]]$PGS,
c(2, 1.5, 3)
c(2, 1.5, 3, 0)
);
expect_warning(
apply.polygenic.score(
Expand All @@ -435,11 +438,12 @@ test_that(
# test case with a VCF multiallelic site that also has an extra beta, one REF and one ALT allele are risk alleles
ref.and.alt.as.two.risk.alleles.test <- apply.polygenic.score(
vcf.data = merged.multiallelic.site.test.data$merged.multiallelic.vcf.data,
pgs.weight.data = merged.multiallelic.site.test.data$ref.and.alt.as.two.risk.alelles.multiallelic.pgs.weight.data
pgs.weight.data = merged.multiallelic.site.test.data$ref.and.alt.as.two.risk.alelles.multiallelic.pgs.weight.data,
missing.genotype.method = 'none'
);
expect_equal(
ref.and.alt.as.two.risk.alleles.test[[PGS.OUTPUT.INDEX]]$PGS,
c(2, 1.5, 2)
c(2, 1.5, 2, 0)
);
expect_warning(
apply.polygenic.score(
Expand Down