Skip to content

Commit 0e0bbdb

Browse files
authored
Merge pull request #82 from uclahs-cds/nzeltser-add-strand-flip-smart-check
add strand flip smart check
2 parents 9b9d7f8 + 04535c5 commit 0e0bbdb

File tree

8 files changed

+154
-16
lines changed

8 files changed

+154
-16
lines changed

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* Added minimum sample size check for grouped density curves
1212
* Added new plotting function `create.pgs.boxplot`
1313
* Added option for user to provide custom PGS source column(s) for plotting functions
14+
* Added option to `assess.pgs.vcf.allele.match` to condition the handling of ambiguous strand flips on the total number of unambiguous strand flips.
1415

1516
# ApplyPolygenicScore 3.0.2
1617

R/apply-pgs.R

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ validate.phenotype.data.input <- function(phenotype.data, phenotype.analysis.col
100100
#' The PGS catalog standard column \code{other_allele} in \code{pgs.weight.data} is required for this check.
101101
#' @param remove.ambiguous.allele.matches A logical indicating whether to remove PGS variants with ambiguous allele matches between PGS weight data and VCF genotype data. Default is \code{FALSE}.
102102
#' The PGS catalog standard column \code{other_allele} in \code{pgs.weight.data} is required for this check.
103+
#' @param max.strand.flips An integer indicating the number of unambiguous strand flips that need to be detected in order to discard all variants with ambiguous allele matches. Only applies if {return.ambiguous.as.missing == TRUE}.
104+
#' Default is \code{0} which means that all ambiguous variants are removed regardless of the status of any other variant.
103105
#' @param remove.mismatched.indels A logical indicating whether to remove indel variants that are mismatched between PGS weight data and VCF genotype data. Default is \code{FALSE}.
104106
#' The PGS catalog standard column \code{other_allele} in \code{pgs.weight.data} is required for this check.
105107
#' @param output.dir A character string indicating the directory to write output files. Separate files are written for per-sample pgs results and optional regression results.
@@ -198,6 +200,9 @@ validate.phenotype.data.input <- function(phenotype.data, phenotype.analysis.col
198200
#' \item \code{remove.ambiguous.allele.matches}: Corresponds to the \code{return.ambiguous.as.missing} argument in \code{assess.pgs.vcf.allele.match}. When \code{TRUE}, non-INDEL allele
199201
#' mismatches that cannot be resolved (due to palindromic alleles or causes other than strand flips) are removed by marking the affected value in the \code{effect_allele} column as missing
200202
#' prior to dosage calling and missing genotype handling. The corresponding dosage is set to NA and the variant is handled according to the chosen missing genotype method.
203+
#' \item \code{max.strand.flips}: This argument only applies when \code{remove.ambiguous.allele.matches} is on and modifies its behavior. In cases where none or very few unambiguous strand flips are detected,
204+
#' it is likely that all ambiguous allele matches are simply palindromic effect size flips. This option facilitates handling of ambiguous allele matches conditional on a maximum number of unambiguous strand flips.
205+
#' Variants with ambiguous strand flips will be marked as missing only if the number of unambiguous strand flips is greater than or equal to \code{max.strand.flips}.
201206
#' \item \code{remove.mismatched.indels}: Corresponds to the \code{return.indels.as.missing} argument in \code{assess.pgs.vcf.allele.match}. When \code{TRUE}, INDEL allele mismatches
202207
#' (which cannot be assessed for strand flips) are removed by marking the affected value in the \code{effect_allele} column as missing prior to dosage calling and missing genotype handling.
203208
#' The corresponding dosage is set to NA and the variant is handled according to the chosen missing genotype method.
@@ -280,6 +285,7 @@ apply.polygenic.score <- function(
280285
phenotype.analysis.columns = NULL,
281286
correct.strand.flips = TRUE,
282287
remove.ambiguous.allele.matches = FALSE,
288+
max.strand.flips = 0,
283289
remove.mismatched.indels = FALSE,
284290
output.dir = NULL,
285291
file.prefix = NULL,
@@ -345,12 +351,16 @@ apply.polygenic.score <- function(
345351

346352
### Start Allele Match Check ###
347353
if (remove.ambiguous.allele.matches || correct.strand.flips) {
354+
# adjust max.strand.flips threshold to account for the long-form of the variant x sample matrix
355+
# each allele match appears once per sample, so the threshold is multiplied by the number of samples
356+
adjusted.max.strand.flips <- max.strand.flips * length(unique(merged.vcf.with.pgs.data$Indiv));
348357
match.assessment <- ApplyPolygenicScore::assess.pgs.vcf.allele.match(
349358
vcf.ref.allele = merged.vcf.with.pgs.data$REF,
350359
vcf.alt.allele = merged.vcf.with.pgs.data$ALT,
351360
pgs.ref.allele = merged.vcf.with.pgs.data$other_allele,
352361
pgs.effect.allele = merged.vcf.with.pgs.data$effect_allele,
353362
return.ambiguous.as.missing = remove.ambiguous.allele.matches,
363+
max.strand.flips = adjusted.max.strand.flips,
354364
return.indels.as.missing = remove.mismatched.indels
355365
);
356366
merged.vcf.with.pgs.data$effect_allele <- match.assessment$new.pgs.effect.allele;

R/assess-strand-flip.R

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ flip.DNA.allele <- function(alleles, return.indels.as.missing = FALSE) {
7979
#' @param return.indels.as.missing A logical value indicating whether to return NA for INDEL alleles with detected mismatches. Default is \code{FALSE}.
8080
#' @param return.ambiguous.as.missing A logical value indicating whether to return NA for ambiguous cases where both a strand flip and effect switch are possible,
8181
#' or no strand flip is detected and a mismatch cannot be resolved. Default is \code{FALSE}.
82+
#' @param max.strand.flips An integer indicating the number of non-ambiguous strand flips that must be present to implement the discarding all allele matches labeled "ambiguous_flip". Only applies if {return.ambiguous.as.missing == TRUE}.
83+
#' Defaults to \code{0}, meaning that no strand flips are allowed. Allele matches labeled "unresolved_mismatch" are not affected by this parameter.
8284
#' @return A list containing the match assessment, a new PGS effect allele, and a new PGS other allele.
8385
#'
8486
#' \strong{Output Structure}
@@ -88,7 +90,7 @@ flip.DNA.allele <- function(alleles, return.indels.as.missing = FALSE) {
8890
#' \item \code{match.status}: A character vector indicating the match status for each pair of allele pairs. Possible values are \code{default_match}, \code{effect_switch}, \code{strand_flip}, \code{effect_switch_with_strand_flip}, \code{ambiguous_flip}, \code{indel_mismatch}, and \code{unresolved_mismatch}.
8991
#' \item \code{new.pgs.effect.allele}: A character vector of new PGS effect alleles based on the match status. If the match status is \code{default_match}, \code{effect_switch} or \code{missing_allele}, the original PGS effect allele is returned.
9092
#' If the match status is \code{strand_flip} or \code{effect_switch_with_strand_flip} the flipped PGS effect allele is returned. If the match status is \code{ambiguous_flip}, \code{indel_mismatch}, or \code{unresolved_mismatch},
91-
#' the return value is either the original allele or NA as dictated by the \code{return.indels.as.missing} and \code{return.ambiguous.as.missing} parameters.
93+
#' the return value is either the original allele or NA as dictated by the \code{return.indels.as.missing}, \code{return.ambiguous.as.missing}, and \code{max.strand.flips} parameters.
9294
#' \item \code{new.pgs.other.allele}: A character vector of new PGS other alleles based on the match status, following the same logic as \code{new.pgs.effect.allele}.
9395
#' }
9496
#'
@@ -123,7 +125,8 @@ assess.pgs.vcf.allele.match <- function(
123125
pgs.ref.allele,
124126
pgs.effect.allele,
125127
return.indels.as.missing = FALSE,
126-
return.ambiguous.as.missing = FALSE
128+
return.ambiguous.as.missing = FALSE,
129+
max.strand.flips = 0
127130
) {
128131

129132
# check that all inputs are one dimensional character vectors
@@ -144,6 +147,20 @@ assess.pgs.vcf.allele.match <- function(
144147
stop('vcf.ref.allele, vcf.alt.allele, pgs.ref.allele, and pgs.effect.allele must be the same length.');
145148
}
146149

150+
# check that all logical parameters are TRUE or FALSE
151+
if (!is.logical(return.indels.as.missing) || length(return.indels.as.missing) != 1) {
152+
stop('return.indels.as.missing must be a single logical value.');
153+
}
154+
155+
if (!is.logical(return.ambiguous.as.missing) || length(return.ambiguous.as.missing) != 1) {
156+
stop('return.ambiguous.as.missing must be a single logical value.');
157+
}
158+
159+
# check that max.strand.flips is a single integer
160+
if (!is.numeric(max.strand.flips) || length(max.strand.flips) != 1 || max.strand.flips < 0 || max.strand.flips != floor(max.strand.flips)) {
161+
stop('max.strand.flips must be a single non-negative integer.');
162+
}
163+
147164
# verify valid alleles
148165
validate.allele.input(vcf.ref.allele, na.allowed = TRUE);
149166
validate.allele.input(pgs.ref.allele, na.allowed = TRUE);
@@ -283,16 +300,11 @@ assess.pgs.vcf.allele.match <- function(
283300
if (effect.switch.candidate && strand.flip.candidate) {
284301
# not possible to determine whether effect switch or strand flip has occured
285302
# This is an ambiguous case caused by palindromic SNPs
303+
# Default is to assume that this is an effect switch until strand flip threshold is exceeded
286304
flip.designation[i] <- 'ambiguous_flip';
287-
if (return.ambiguous.as.missing) {
288-
flipped.effect.allele[i] <- NA;
289-
flipped.other.allele[i] <- NA;
290-
next;
291-
} else {
292-
flipped.effect.allele[i] <- current.pgs.effect.allele;
293-
flipped.other.allele[i] <- current.pgs.ref.allele;
294-
next;
295-
}
305+
flipped.effect.allele[i] <- current.pgs.effect.allele;
306+
flipped.other.allele[i] <- current.pgs.ref.allele;
307+
next;
296308
} else if (effect.switch.candidate) {
297309
# if this is a clear-cut effect switch, return the default PGS alleles
298310
# apply.polygenic.score automatically handles effect switches during PGS application
@@ -338,6 +350,26 @@ assess.pgs.vcf.allele.match <- function(
338350

339351
}
340352

353+
# implement max strand flip threshold for ambiguous cases
354+
# count number of unambiguous strand flips
355+
if (return.ambiguous.as.missing) {
356+
if (max.strand.flips > 0) {
357+
# if a max strand flips threshold is set, check if it is exceeded
358+
unambiguous.strand.flips <- sum(flip.designation == 'strand_flip' | flip.designation == 'effect_switch_with_strand_flip');
359+
if (unambiguous.strand.flips >= max.strand.flips) {
360+
# if the number of unambiguous strand flips equals or exceeds the threshold, return NA for all ambiguous cases
361+
flipped.effect.allele[flip.designation == 'ambiguous_flip'] <- NA;
362+
flipped.other.allele[flip.designation == 'ambiguous_flip'] <- NA;
363+
}
364+
# if max threshold is not exceeded, return all ambiguous cases with no flips (default)
365+
} else {
366+
# if max.strand.flips is 0 there is no tolerance for ambiguous cases, return all as NA
367+
flipped.effect.allele[flip.designation == 'ambiguous_flip'] <- NA;
368+
flipped.other.allele[flip.designation == 'ambiguous_flip'] <- NA;
369+
}
370+
# If return.ambiguous.as.missing is FALSE, keep all ambiguous cases as is (no flips)
371+
}
372+
341373
return(
342374
list(
343375
match.status = flip.designation,

man/apply.polygenic.score.Rd

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/assess.pgs.vcf.allele.match.Rd

Lines changed: 6 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-pgs-application.R

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,7 @@ test_that(
737737
test_that(
738738
'apply.polygenic.score correctly handles strand flipping', {
739739
load('data/strand.flip.test.data.Rda');
740+
# no strand flip correction
740741
strand.flip.raw.data <- apply.polygenic.score(
741742
vcf.data = strand.flip.test.data$strand.flip.vcf.data,
742743
pgs.weight.data = strand.flip.test.data$strand.flip.pgs.weight.data,
@@ -754,6 +755,7 @@ test_that(
754755
c(0, 0, 0)
755756
);
756757

758+
# strand flip correction
757759
strand.flip.correct.flips <- apply.polygenic.score(
758760
vcf.data = strand.flip.test.data$strand.flip.vcf.data,
759761
pgs.weight.data = strand.flip.test.data$strand.flip.pgs.weight.data,
@@ -771,6 +773,7 @@ test_that(
771773
c(0, 0, 0)
772774
);
773775

776+
# ambiguous allele matches removed
774777
strand.flip.remove.ambiguous <- apply.polygenic.score(
775778
vcf.data = strand.flip.test.data$strand.flip.vcf.data,
776779
pgs.weight.data = strand.flip.test.data$strand.flip.pgs.weight.data,
@@ -789,6 +792,28 @@ test_that(
789792
c(2, 2, 2)
790793
);
791794

795+
# ambiguous allele matches not removed due to threshold
796+
strand.flip.threshold.ambiguous <- apply.polygenic.score(
797+
vcf.data = strand.flip.test.data$strand.flip.vcf.data,
798+
pgs.weight.data = strand.flip.test.data$strand.flip.pgs.weight.data,
799+
correct.strand.flips = TRUE,
800+
missing.genotype.method = c('none', 'mean.dosage', 'normalize'),
801+
remove.ambiguous.allele.matches = TRUE,
802+
max.strand.flips = 3 # there are two unambiguous strand flips in the test data, setting threshold above that
803+
);
804+
805+
# expect equal to not removing ambiguous allele matches
806+
expect_equal(
807+
strand.flip.threshold.ambiguous$pgs.output$PGS, # dosage is unaffected because unresolved mismatch does not contain an effect allele so dosage is 0
808+
strand.flip.correct.flips$pgs.output$PGS
809+
);
810+
811+
expect_equal(
812+
strand.flip.threshold.ambiguous$pgs.output$n.missing.genotypes,
813+
c(1, 1, 1) # the unresolved mismatch is marked missing
814+
);
815+
816+
# indels removed
792817
strand.flip.remove.indel.mismatches <- apply.polygenic.score(
793818
vcf.data = strand.flip.test.data$strand.flip.vcf.data,
794819
pgs.weight.data = strand.flip.test.data$strand.flip.pgs.weight.data,

0 commit comments

Comments
 (0)