Skip to content

Commit 9491c85

Browse files
authored
Merge pull request #88 from uclahs-cds/nzeltser-document-v4
document v4 features
2 parents d595990 + 996ae68 commit 9491c85

File tree

6 files changed

+275
-70
lines changed

6 files changed

+275
-70
lines changed

R/plot-pgs.R

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,14 @@ create.pgs.boxplot <- function(
590590
boxplot.colors <- suppressWarnings(BoutrosLab.plotting.general::default.colours(length(levels(pgs.data[ , phenotype]))));
591591
names(boxplot.colors) <- levels(pgs.data[ , phenotype]);
592592
}
593+
594+
# handle stripplot vs boxplot coloring
595+
if (add.stripplot) {
596+
box.colors <- 'transparent';
597+
} else {
598+
box.colors <- boxplot.colors;
599+
}
600+
593601
# plot boxplot
594602
group.yaxis.formatting <- basic.yaxis.formatting;
595603
pgs.boxplots.by.phenotype[[paste0(pgs.column,'_',phenotype)]] <- BoutrosLab.plotting.general::create.boxplot(
@@ -606,8 +614,8 @@ create.pgs.boxplot <- function(
606614
xaxis.cex = xaxes.cex,
607615
yat = group.yaxis.formatting$at,
608616
yaxis.lab = group.yaxis.formatting$axis.lab,
609-
points.col = boxplot.colors[pgs.data[ , phenotype]] # color points by phenotype
610-
#col = boxplot.colors
617+
points.col = boxplot.colors[pgs.data[ , phenotype]], # color points by phenotype
618+
col = box.colors # color boxes by phenotype
611619
);
612620
}
613621
}

R/run-pgs-statistics.R

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,9 @@ run.pgs.regression <- function(pgs, phenotype.data) {
243243
#' for binary or continuous phenotypes. For continuous phenotypes, it converts them
244244
#' to binary based on a specified cutoff threshold. It calculates and returns AUC,
245245
#' Odds Ratios (OR), and p-values for each PGS. Corresponding ROC curves are plotted automatically.
246-
#' @param data A data frame containing the PGS, phenotype, and covariate columns.
246+
#' @param pgs.data A data frame containing the PGS, phenotype, and covariate columns.
247247
#' @param pgs.columns A character vector specifying the names of the PGS columns
248-
#' in \code{data} to be analyzed. All specified columns must be numeric.
248+
#' in \code{pgs.data} to be analyzed. All specified columns must be numeric.
249249
#' @param phenotype.columns A character vector specifying the names of the phenotype columns in \code{data} to be analyzed. If binary phenotypes are specified, they must be factors with two levels (0 and 1).
250250
#' @param covariate.columns A character vector specifying the names of covariate columns in \code{data} to be included in the regression model. Default is NULL.
251251
#' @param phenotype.type A character string specifying the type of phenotype. Must be either 'continuous' or 'binary'. All provided phenotype columns must match this type.
@@ -320,7 +320,7 @@ run.pgs.regression <- function(pgs, phenotype.data) {
320320
#'
321321
#' @export
322322
analyze.pgs.binary.predictiveness <- function(
323-
data,
323+
pgs.data,
324324
pgs.columns,
325325
phenotype.columns,
326326
covariate.columns = NULL,
@@ -337,22 +337,22 @@ analyze.pgs.binary.predictiveness <- function(
337337
) {
338338
## Validate Inputs ##
339339

340-
if (!is.data.frame(data)) {
341-
stop('`data` must be a data frame.');
340+
if (!is.data.frame(pgs.data)) {
341+
stop('`pgs.data` must be a data frame.');
342342
}
343343

344-
if (!all(pgs.columns %in% names(data))) {
344+
if (!all(pgs.columns %in% names(pgs.data))) {
345345
stop('Not all specified `pgs.columns` found in the data frame.');
346346
}
347-
if (!all(sapply(data[, pgs.columns, drop = FALSE], is.numeric))) {
347+
if (!all(sapply(pgs.data[, pgs.columns, drop = FALSE], is.numeric))) {
348348
stop('All `pgs.columns` must be numeric.');
349349
}
350350

351-
if (!all(phenotype.columns %in% names(data))) {
351+
if (!all(phenotype.columns %in% names(pgs.data))) {
352352
stop(paste0('Not all specified `phenotype.columns` found in the data frame.'));
353353
}
354354

355-
if (!is.null(covariate.columns) && !all(covariate.columns %in% names(data))) {
355+
if (!is.null(covariate.columns) && !all(covariate.columns %in% names(pgs.data))) {
356356
stop('Not all specified `covariate.columns` found in the data frame.');
357357
}
358358

@@ -371,14 +371,14 @@ analyze.pgs.binary.predictiveness <- function(
371371
# Validate phenotype types consistency
372372
for (pheno.col in phenotype.columns) {
373373
if (phenotype.type == 'binary') {
374-
if (!is.factor(data[[pheno.col]]) && !all(unique(na.omit(data[[pheno.col]])) %in% c(0, 1))) {
374+
if (!is.factor(pgs.data[[pheno.col]]) && !all(unique(na.omit(pgs.data[[pheno.col]])) %in% c(0, 1))) {
375375
stop(paste0('Phenotype column \'', pheno.col, '\' is specified as binary but is not a factor or 0/1 numeric. Convert to factor.'));
376376
}
377-
if (is.factor(data[[pheno.col]]) && nlevels(data[[pheno.col]]) != 2) {
377+
if (is.factor(pgs.data[[pheno.col]]) && nlevels(pgs.data[[pheno.col]]) != 2) {
378378
stop(paste0('Binary phenotype column \'', pheno.col, '\' must have exactly two levels.'));
379379
}
380380
} else { # phenotype.type == 'continuous'
381-
if (!is.numeric(data[[pheno.col]])) {
381+
if (!is.numeric(pgs.data[[pheno.col]])) {
382382
stop(paste0('Phenotype column \'', pheno.col, '\' is specified as continuous but is not numeric.'));
383383
}
384384
if (is.null(cutoff.threshold)) {
@@ -415,7 +415,7 @@ analyze.pgs.binary.predictiveness <- function(
415415
for (current.phenotype in phenotype.columns) {
416416
# message(paste0('Processing phenotype: ', current.phenotype));
417417

418-
temp.data <- data; # Work on a fresh copy for each phenotype
418+
temp.data <- pgs.data; # Work on a fresh copy for each phenotype
419419

420420
pheno.var <- temp.data[[current.phenotype]];
421421

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ If you wish to apply a PGS to a cohort, we recommend that genotypes for the whol
9090

9191
4. Create summary plots.
9292

93-
ApplyPolygenicScore comes with several plotting functions designed to operate on the results of `apply.polygenic.score`. Display PGS density curves with `create.pgs.density.plot` and PGS percentile ranks with `create.pgs.rank.plot`. If you provided phenotype data in step 3, you can incorporate categorical data into the density plots and categorical and continuous phenotype data into the rank plots, and use `create.pgs.with.continuous.phenotype.plot` to make scatterplots of your PGS against any continuous phenotype data.
93+
ApplyPolygenicScore comes with several plotting functions designed to operate on the results of `apply.polygenic.score`. Display PGS density curves with `create.pgs.density.plot`, distributions with `create.pgs.boxplot` and PGS percentile ranks with `create.pgs.rank.plot`. If you provided phenotype data in step 3, you can incorporate categorical data into the density plots and categorical and continuous phenotype data into the rank plots, and use `create.pgs.with.continuous.phenotype.plot` to make scatterplots of your PGS against any continuous phenotype data. For more sophisticated downstream analysis, check how well the PGS predicts binary outcomes using `analyze.pgs.binary.predictiveness`.
9494

9595
For more step-by-step instructions, check out our [vignettes](https://CRAN.R-project.org/package=ApplyPolygenicScore).
9696

man/analyze.pgs.binary.predictiveness.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-pgs-statistics.R

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -267,18 +267,18 @@ test_that(
267267
# Test 1: `data` must be a data frame
268268
expect_error(
269269
analyze.pgs.binary.predictiveness(
270-
data = list(),
270+
pgs.data = list(),
271271
pgs.columns = 'PGS.A',
272272
phenotype.columns = 'Pheno.Binary.01',
273273
phenotype.type = 'binary'
274274
),
275-
'`data` must be a data frame.'
275+
'`pgs.data` must be a data frame.'
276276
);
277277

278278
# Test 2: Not all specified `pgs.columns` found in the data frame
279279
expect_error(
280280
analyze.pgs.binary.predictiveness(
281-
data = test.data,
281+
pgs.data = test.data,
282282
pgs.columns = c('PGS.A', 'NonExistentPGS'),
283283
phenotype.columns = 'Pheno.Binary.01',
284284
phenotype.type = 'binary'
@@ -289,7 +289,7 @@ test_that(
289289
# Test 3: All `pgs.columns` must be numeric
290290
expect_error(
291291
analyze.pgs.binary.predictiveness(
292-
data = test.data,
292+
pgs.data = test.data,
293293
pgs.columns = 'NonNumericPGS',
294294
phenotype.columns = 'Pheno.Binary.01',
295295
phenotype.type = 'binary'
@@ -300,7 +300,7 @@ test_that(
300300
# Test 4: Not all specified `phenotype.columns` found in the data frame
301301
expect_error(
302302
analyze.pgs.binary.predictiveness(
303-
data = test.data,
303+
pgs.data = test.data,
304304
pgs.columns = 'PGS.A',
305305
phenotype.columns = c('Pheno.Binary.01', 'NonExistentPheno'),
306306
phenotype.type = 'binary'
@@ -311,7 +311,7 @@ test_that(
311311
# Test 5: `covariate.columns` not found
312312
expect_error(
313313
analyze.pgs.binary.predictiveness(
314-
data = test.data,
314+
pgs.data = test.data,
315315
pgs.columns = 'PGS.A',
316316
phenotype.columns = 'Pheno.Binary.01',
317317
covariate.columns = 'NonExistentCovariate',
@@ -323,7 +323,7 @@ test_that(
323323
# Test 6: `phenotype.type` is invalid (using match.arg's error)
324324
expect_error(
325325
analyze.pgs.binary.predictiveness(
326-
data = test.data,
326+
pgs.data = test.data,
327327
pgs.columns = 'PGS.A',
328328
phenotype.columns = 'Pheno.Binary.01',
329329
phenotype.type = 'invalid_type'
@@ -339,7 +339,7 @@ test_that(
339339
# Test 7a: Binary phenotype column is a factor but not 2 levels
340340
expect_error(
341341
analyze.pgs.binary.predictiveness(
342-
data = test.data,
342+
pgs.data = test.data,
343343
pgs.columns = 'PGS.A',
344344
phenotype.columns = 'Pheno.Binary.3Levels',
345345
phenotype.type = 'binary'
@@ -351,7 +351,7 @@ test_that(
351351
# Pheno.Binary.Char.YesNo is character, not 0/1 numeric
352352
expect_error(
353353
analyze.pgs.binary.predictiveness(
354-
data = test.data,
354+
pgs.data = test.data,
355355
pgs.columns = 'PGS.A',
356356
phenotype.columns = 'Pheno.Binary.Char.YesNo',
357357
phenotype.type = 'binary'
@@ -362,7 +362,7 @@ test_that(
362362
# Test 7c: Numeric binary phenotype is automatically converted to factor.
363363
expect_warning( # Expect no error or warning for valid conversion
364364
analyze.pgs.binary.predictiveness(
365-
data = test.data,
365+
pgs.data = test.data,
366366
pgs.columns = 'PGS.A',
367367
phenotype.columns = 'Pheno.Binary.Numeric.01',
368368
phenotype.type = 'binary'
@@ -373,7 +373,7 @@ test_that(
373373
# Test 7d: Binary phenotype that converts successfully (no error expected, but good for coverage)
374374
expect_silent( # Expect no error or warning for valid conversion
375375
analyze.pgs.binary.predictiveness(
376-
data = test.data,
376+
pgs.data = test.data,
377377
pgs.columns = 'PGS.A',
378378
phenotype.columns = 'Pheno.Binary.01',
379379
phenotype.type = 'binary'
@@ -390,7 +390,7 @@ test_that(
390390
test.data.bad.continuous$Pheno.Continuous.Num <- as.character(test.data.bad.continuous$Pheno.Continuous.Num);
391391
expect_error(
392392
analyze.pgs.binary.predictiveness(
393-
data = test.data.bad.continuous,
393+
pgs.data = test.data.bad.continuous,
394394
pgs.columns = 'PGS.A',
395395
phenotype.columns = 'Pheno.Continuous.Num',
396396
phenotype.type = 'continuous',
@@ -402,7 +402,7 @@ test_that(
402402
# Test 9: `cutoff.threshold` missing for continuous phenotype
403403
expect_error(
404404
analyze.pgs.binary.predictiveness(
405-
data = test.data,
405+
pgs.data = test.data,
406406
pgs.columns = 'PGS.A',
407407
phenotype.columns = 'Pheno.Continuous.Num',
408408
phenotype.type = 'continuous',
@@ -414,7 +414,7 @@ test_that(
414414
# Test 10: `cutoff.threshold` is a list but missing entry for specific phenotype
415415
expect_error(
416416
analyze.pgs.binary.predictiveness(
417-
data = test.data,
417+
pgs.data = test.data,
418418
pgs.columns = 'PGS.A',
419419
phenotype.columns = 'Pheno.Continuous.Num',
420420
phenotype.type = 'continuous',
@@ -426,7 +426,7 @@ test_that(
426426
# Test 11: `cutoff.threshold` is not numeric or a named list
427427
expect_error(
428428
analyze.pgs.binary.predictiveness(
429-
data = test.data,
429+
pgs.data = test.data,
430430
pgs.columns = 'PGS.A',
431431
phenotype.columns = 'Pheno.Continuous.Num',
432432
phenotype.type = 'continuous',
@@ -438,7 +438,7 @@ test_that(
438438
# Test 12: `cutoff.threshold` is a list but value for specific phenotype is NULL/not numeric
439439
expect_error(
440440
analyze.pgs.binary.predictiveness(
441-
data = test.data,
441+
pgs.data = test.data,
442442
pgs.columns = 'PGS.A',
443443
phenotype.columns = 'Pheno.Continuous.Num',
444444
phenotype.type = 'continuous',
@@ -465,7 +465,7 @@ test_that(
465465
}
466466
expect_error(
467467
analyze.pgs.binary.predictiveness(
468-
data = many.pgs.data,
468+
pgs.data = many.pgs.data,
469469
pgs.columns = paste0('PGS', 1:13),
470470
phenotype.columns = 'Pheno.Binary.01',
471471
phenotype.type = 'binary',
@@ -487,7 +487,7 @@ test_that(
487487
);
488488
expect_warning(
489489
results <- analyze.pgs.binary.predictiveness(
490-
data = test.data.no.complete.cases,
490+
pgs.data = test.data.no.complete.cases,
491491
pgs.columns = 'PGS.A',
492492
phenotype.columns = 'Pheno.Binary.01',
493493
phenotype.type = 'binary',
@@ -506,7 +506,7 @@ test_that(
506506
);
507507
expect_warning(
508508
results <- analyze.pgs.binary.predictiveness(
509-
data = test.data.bad.glm,
509+
pgs.data = test.data.bad.glm,
510510
pgs.columns = 'PGS.A',
511511
phenotype.columns = 'Pheno.Binary.01',
512512
phenotype.type = 'binary',
@@ -526,7 +526,7 @@ test_that(
526526
);
527527
expect_warning(
528528
results <- analyze.pgs.binary.predictiveness(
529-
data = test.data.bad.roc,
529+
pgs.data = test.data.bad.roc,
530530
pgs.columns = 'PGS.A',
531531
phenotype.columns = 'Pheno.Binary.01',
532532
phenotype.type = 'binary',
@@ -550,7 +550,7 @@ test_that(
550550

551551
# Scenario 1: Binary phenotypes, with covariates, return plot object
552552
results.binary <- analyze.pgs.binary.predictiveness(
553-
data = test.data, # Using the global test.data
553+
pgs.data = test.data, # Using the global test.data
554554
pgs.columns = c('PGS.A', 'PGS.B'),
555555
phenotype.columns = c('Pheno.Binary.01', 'Pheno.Binary.TF'),
556556
covariate.columns = c('Cov.Age', 'Cov.Sex'),
@@ -596,7 +596,7 @@ test_that(
596596
# Scenario 2: Continuous phenotype, no covariates, save to file (roc.plot should be NULL in return)
597597
# Use existing test.data
598598
results.continuous <- analyze.pgs.binary.predictiveness(
599-
data = test.data, # Using the global test.data
599+
pgs.data = test.data, # Using the global test.data
600600
pgs.columns = 'PGS.A',
601601
phenotype.columns = 'Pheno.Continuous.Num',
602602
covariate.columns = NULL,

0 commit comments

Comments
 (0)