Skip to content

Commit a36670b

Browse files
authored
Merge pull request #70 from uclahs-cds/nzeltser-update-strand-flip
small functionality patches to strand flip checks, dosage calculator, automated statistics
2 parents ccc803e + dd9f6ec commit a36670b

File tree

9 files changed

+66
-23
lines changed

9 files changed

+66
-23
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: ApplyPolygenicScore
22
Type: Package
33
Title: Utilities for the Application of a Polygenic Score to a VCF
4-
Version: 3.0.0
4+
Version: 3.0.1
55
Authors@R: c(
66
person('Paul', 'Boutros', role = 'cre', email = '[email protected]'),
77
person('Nicole', 'Zeltser', role = 'aut', comment = c(ORCID = '0000-0001-7246-2771')),

NEWS.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# Unreleased
22

3+
# ApplyPolygenicScore 3.0.1
4+
5+
## Added
6+
* Added hemizygous allele handling to dosage calculation
7+
8+
## Changed
9+
* Updated INDEL effect switch reporting by strand flip checker
10+
* Updated data structuring for automated statistical analysis in `apply.polygenic.score`
11+
312
# ApplyPolygenicScore 3.0.0 (2024-12-02)
413

514
## Added

R/apply-pgs.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -561,9 +561,12 @@ apply.polygenic.score <- function(
561561
### Begin Phenotype Analysis ###
562562

563563
if (!is.null(phenotype.analysis.columns)) {
564+
# post merge data selection
565+
pgs.for.phenotype.stats <- pgs.output[ , missing.method.to.colname.ref[analysis.source.pgs]];
566+
564567
regression.output <- run.pgs.regression(
565-
pgs = pgs.for.stats,
566-
phenotype.data = subset(phenotype.data, select = phenotype.analysis.columns)
568+
pgs = pgs.for.phenotype.stats,
569+
phenotype.data = subset(pgs.output, select = phenotype.analysis.columns)
567570
);
568571
}
569572
### End Phenotype Analysis ###

R/assess-strand-flip.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,14 @@ assess.pgs.vcf.allele.match <- function(
216216
# identify insertion/deletion alleles in PGS
217217
if (nchar(current.pgs.ref.allele) > 1 || nchar(current.pgs.effect.allele) > 1) {
218218
pgs.indel <- TRUE;
219+
if (effect.switch.candidate) {
220+
# if an INDEL effect switch is detected, do not continue to strand flip assessment
221+
# and return effect switch designation
222+
flip.designation[i] <- 'effect_switch';
223+
flipped.effect.allele[i] <- current.pgs.effect.allele;
224+
flipped.other.allele[i] <- current.pgs.ref.allele;
225+
next;
226+
}
219227
# no INDEL flipping supported, return either NA or the original allele
220228
warning('Mismatch detected in INDEL PGS allele. Skipping strand flip assessment.');
221229
flip.designation[i] <- 'indel_mismatch';
@@ -234,6 +242,14 @@ assess.pgs.vcf.allele.match <- function(
234242

235243
# identify insertion/deletion alleles in VCF
236244
if (nchar(current.vcf.ref.allele) > 1 || all(nchar(current.vcf.alt.allele.split) > 1)) {
245+
if (effect.switch.candidate) {
246+
# if an INDEL effect switch is detected, do not continue to strand flip assessment
247+
# and return effect switch designation
248+
flip.designation[i] <- 'effect_switch';
249+
flipped.effect.allele[i] <- current.pgs.effect.allele;
250+
flipped.other.allele[i] <- current.pgs.ref.allele;
251+
next;
252+
}
237253
# no INDEL flipping supported, return either NA or the original allele
238254
warning('Mismatch detected in INDEL VCF allele. Skipping strand flip assessment.');
239255
flip.designation[i] <- 'indel_mismatch';

R/calculate-dosage.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#' @description Convert genotype calls in the form of witten out alleles (e.g. 'A/T') to dosages (0, 1, 2) based on provided risk alleles from a PGS.
33
#' @param called.alleles A vector of genotypes in allelic notation separated by a slash or pipe.
44
#' @param risk.alleles A vector of risk alleles from a polygenic score corresponding to each genotype (by locus) in called.alleles.
5-
#' @return A vector of dosages corresponding to each genotype in called.alleles.
5+
#' @return A vector of dosages corresponding to each genotype in called.alleles. Hemizygous genotypes (one allele e.g. 'A') are counted as 1.
66
#' @examples
77
#' called.alleles <- c('A/A', 'A/T', 'T/T');
88
#' risk.alleles <- c('T', 'T', 'T');
@@ -31,12 +31,16 @@ convert.alleles.to.pgs.dosage <- function(called.alleles, risk.alleles) {
3131
} else {
3232
# check that called.alleles is a vector of genotypes in allelic notation or '.' separated by a slash or pipe
3333
# "*" characters represent overlapping deletions from an upstream indel and are accepted VCF format
34-
allowed.pattern <- '^((([A-Z]+|\\.|\\*)[/\\|]([A-Z]+|\\.|\\*))|\\.)$' # '|' are special chars in regular expressions
34+
allowed.pattern <- '^((([A-Z]+|\\.|\\*)[/\\|]([A-Z]+|\\.|\\*))|\\.|[A-Z]+)$' # '|' are special chars in regular expressions
3535
passing.alleles <- grepl(allowed.pattern, called.alleles);
3636
passing.alleles[is.na(called.alleles)] <- TRUE; # NA allowed
3737
if (!all(passing.alleles)) {
3838
stop('unrecognized called.alleles format, must be capitalized letters, "." or "*" separated by a slash or pipe.');
3939
}
40+
# replace hemizygous genotypes with a placeholder for easier splitting
41+
# index for non-NA alleles that are missing allele separators:
42+
no.sep.index <- (!grepl('/|\\|', called.alleles) & !is.na(called.alleles) & called.alleles != '.');
43+
called.alleles[no.sep.index] <- paste0(called.alleles[no.sep.index], '/-');
4044
split.alleles <- data.table::tstrsplit(called.alleles, split = c('/|\\|'), keep = c(1,2)); # '|' are special chars in regular expressions
4145
}
4246
names(split.alleles) <- c('called.allele.a', 'called.allele.b');

R/run-pgs-statistics.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ classify.variable.type <- function(data, continuous.threshold = 4) {
6262
continuous.vars.index <- sapply(
6363
X = data,
6464
FUN = function(x) {
65-
'numeric' == class(x) & continuous.threshold < length(unique(na.omit(x)));
65+
('numeric' == class(x) | 'integer' == class(x)) & continuous.threshold < length(unique(na.omit(x)));
6666
}
6767
);
6868
binary.vars.index <- sapply(

man/convert.alleles.to.pgs.dosage.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-dosage-calculator.R

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,6 @@ test_that(
3434
);
3535

3636
# check for correct called.alleles format
37-
expect_error(
38-
convert.alleles.to.pgs.dosage(
39-
called.alleles = c('A/A', 'A'),
40-
risk.alleles = c('A', 'T')
41-
),
42-
'unrecognized called.alleles format, must be capitalized letters, "." or "\\*" separated by a slash or pipe.'
43-
);
4437
expect_error(
4538
convert.alleles.to.pgs.dosage(
4639
called.alleles = c('A/A', 'A,'),
@@ -101,8 +94,8 @@ test_that(
10194
# check that correct input is accepted
10295
expect_silent(
10396
convert.alleles.to.pgs.dosage(
104-
called.alleles = c('A/A', 'A|T', 'TA/T', 'A/ATTTT', './.', '.', '*/T', 'T/*', '*/*'),
105-
risk.alleles = c('A', 'T', 'A', 'T', 'A', 'T', 'A', 'T', 'A')
97+
called.alleles = c('A/A', 'A|T', 'TA/T', 'A/ATTTT', './.', '.', '*/T', 'T/*', '*/*', 'A', 'T'),
98+
risk.alleles = c('A', 'T', 'A', 'T', 'A', 'T', 'A', 'T', 'A', 'T', 'A')
10699
)
107100
);
108101
}
@@ -132,6 +125,18 @@ test_that(
132125
}
133126
);
134127

128+
test_that(
129+
'convert.alleles.to.pgs.dosage calculates dosage correctly from hemizygous genotypes', {
130+
expect_equal(
131+
convert.alleles.to.pgs.dosage(
132+
called.alleles = c('A', 'T'),
133+
risk.alleles = c('A', 'A')
134+
),
135+
c(1, 0)
136+
);
137+
}
138+
);
139+
135140
test_that(
136141
'convert.alleles.to.pgs.dosage calculates dosage correctly from short indel alleles', {
137142
expect_equal(

tests/testthat/test-strand-flip-handling.R

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,12 @@ test_that(
206206
assess.pgs.vcf.allele.match('ATCG', 'A', 'G', 'A'),
207207
'Mismatch detected in INDEL VCF allele. Skipping strand flip assessment.'
208208
);
209+
210+
# indel effect switch case
211+
expect_silent(
212+
assess.pgs.vcf.allele.match('A', 'ATCG', 'ATCG', 'A')
213+
);
214+
209215
# indel but everything matches
210216
expect_silent(
211217
assess.pgs.vcf.allele.match('ATCG', 'A', 'ATCG', 'A')
@@ -214,26 +220,26 @@ test_that(
214220
# check indel handling when return.indels.as.missing == FALSE
215221
# VCF ALT allele is an INDEL
216222
test.leave.indels.vcf.alt <- assess.pgs.vcf.allele.match(
217-
c('A', 'A'),
218-
c('G', 'ATCG'),
219-
c('A', 'A'),
220-
c('G', 'G'),
223+
c('A', 'A', 'A'),
224+
c('G', 'ATCG', 'ATCG'),
225+
c('A', 'A', 'ATCG'),
226+
c('G', 'G', 'A'),
221227
return.indels.as.missing = FALSE
222228
);
223229

224230
expect_equal(
225231
test.leave.indels.vcf.alt$match.status,
226-
c('default_match', 'indel_mismatch')
232+
c('default_match', 'indel_mismatch', 'effect_switch')
227233
);
228234

229235
expect_equal(
230236
test.leave.indels.vcf.alt$new.pgs.effect.allele,
231-
c('G', 'G')
237+
c('G', 'G', 'A')
232238
);
233239

234240
expect_equal(
235241
test.leave.indels.vcf.alt$new.pgs.other.allele,
236-
c('A', 'A')
242+
c('A', 'A', 'ATCG')
237243
);
238244

239245
# VCF REF allele is an INDEL

0 commit comments

Comments
 (0)