Skip to content
12 changes: 10 additions & 2 deletions R/plot-pgs.R
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,14 @@ create.pgs.boxplot <- function(
boxplot.colors <- suppressWarnings(BoutrosLab.plotting.general::default.colours(length(levels(pgs.data[ , phenotype]))));
names(boxplot.colors) <- levels(pgs.data[ , phenotype]);
}

# handle stripplot vs boxplot coloring
if (add.stripplot) {
box.colors <- 'transparent';
} else {
box.colors <- boxplot.colors;
}

# plot boxplot
group.yaxis.formatting <- basic.yaxis.formatting;
pgs.boxplots.by.phenotype[[paste0(pgs.column,'_',phenotype)]] <- BoutrosLab.plotting.general::create.boxplot(
Expand All @@ -606,8 +614,8 @@ create.pgs.boxplot <- function(
xaxis.cex = xaxes.cex,
yat = group.yaxis.formatting$at,
yaxis.lab = group.yaxis.formatting$axis.lab,
points.col = boxplot.colors[pgs.data[ , phenotype]] # color points by phenotype
#col = boxplot.colors
points.col = boxplot.colors[pgs.data[ , phenotype]], # color points by phenotype
col = box.colors # color boxes by phenotype
);
}
}
Expand Down
24 changes: 12 additions & 12 deletions R/run-pgs-statistics.R
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ run.pgs.regression <- function(pgs, phenotype.data) {
#' for binary or continuous phenotypes. For continuous phenotypes, it converts them
#' to binary based on a specified cutoff threshold. It calculates and returns AUC,
#' Odds Ratios (OR), and p-values for each PGS. Corresponding ROC curves are plotted automatically.
#' @param data A data frame containing the PGS, phenotype, and covariate columns.
#' @param pgs.data A data frame containing the PGS, phenotype, and covariate columns.
#' @param pgs.columns A character vector specifying the names of the PGS columns
#' in \code{data} to be analyzed. All specified columns must be numeric.
#' @param phenotype.columns A character vector specifying the names of the phenotype columns in \code{data} to be analyzed. If binary phenotypes are specified, they must be factors with two levels (0 and 1).
Expand Down Expand Up @@ -320,7 +320,7 @@ run.pgs.regression <- function(pgs, phenotype.data) {
#'
#' @export
analyze.pgs.binary.predictiveness <- function(
data,
pgs.data,
pgs.columns,
phenotype.columns,
covariate.columns = NULL,
Expand All @@ -337,22 +337,22 @@ analyze.pgs.binary.predictiveness <- function(
) {
## Validate Inputs ##

if (!is.data.frame(data)) {
stop('`data` must be a data frame.');
if (!is.data.frame(pgs.data)) {
stop('`pgs.data` must be a data frame.');
}

if (!all(pgs.columns %in% names(data))) {
if (!all(pgs.columns %in% names(pgs.data))) {
stop('Not all specified `pgs.columns` found in the data frame.');
}
if (!all(sapply(data[, pgs.columns, drop = FALSE], is.numeric))) {
if (!all(sapply(pgs.data[, pgs.columns, drop = FALSE], is.numeric))) {
stop('All `pgs.columns` must be numeric.');
}

if (!all(phenotype.columns %in% names(data))) {
if (!all(phenotype.columns %in% names(pgs.data))) {
stop(paste0('Not all specified `phenotype.columns` found in the data frame.'));
}

if (!is.null(covariate.columns) && !all(covariate.columns %in% names(data))) {
if (!is.null(covariate.columns) && !all(covariate.columns %in% names(pgs.data))) {
stop('Not all specified `covariate.columns` found in the data frame.');
}

Expand All @@ -371,14 +371,14 @@ analyze.pgs.binary.predictiveness <- function(
# Validate phenotype types consistency
for (pheno.col in phenotype.columns) {
if (phenotype.type == 'binary') {
if (!is.factor(data[[pheno.col]]) && !all(unique(na.omit(data[[pheno.col]])) %in% c(0, 1))) {
if (!is.factor(pgs.data[[pheno.col]]) && !all(unique(na.omit(pgs.data[[pheno.col]])) %in% c(0, 1))) {
stop(paste0('Phenotype column \'', pheno.col, '\' is specified as binary but is not a factor or 0/1 numeric. Convert to factor.'));
}
if (is.factor(data[[pheno.col]]) && nlevels(data[[pheno.col]]) != 2) {
if (is.factor(pgs.data[[pheno.col]]) && nlevels(pgs.data[[pheno.col]]) != 2) {
stop(paste0('Binary phenotype column \'', pheno.col, '\' must have exactly two levels.'));
}
} else { # phenotype.type == 'continuous'
if (!is.numeric(data[[pheno.col]])) {
if (!is.numeric(pgs.data[[pheno.col]])) {
stop(paste0('Phenotype column \'', pheno.col, '\' is specified as continuous but is not numeric.'));
}
if (is.null(cutoff.threshold)) {
Expand Down Expand Up @@ -415,7 +415,7 @@ analyze.pgs.binary.predictiveness <- function(
for (current.phenotype in phenotype.columns) {
# message(paste0('Processing phenotype: ', current.phenotype));

temp.data <- data; # Work on a fresh copy for each phenotype
temp.data <- pgs.data; # Work on a fresh copy for each phenotype

pheno.var <- temp.data[[current.phenotype]];

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ If you wish to apply a PGS to a cohort, we recommend that genotypes for the whol

4. Create summary plots.

ApplyPolygenicScore comes with several plotting functions designed to operate on the results of `apply.polygenic.score`. Display PGS density curves with `create.pgs.density.plot` and PGS percentile ranks with `create.pgs.rank.plot`. If you provided phenotype data in step 3, you can incorporate categorical data into the density plots and categorical and continuous phenotype data into the rank plots, and use `create.pgs.with.continuous.phenotype.plot` to make scatterplots of your PGS against any continuous phenotype data.
ApplyPolygenicScore comes with several plotting functions designed to operate on the results of `apply.polygenic.score`. Display PGS density curves with `create.pgs.density.plot`, distributions with `create.pgs.boxplot` and PGS percentile ranks with `create.pgs.rank.plot`. If you provided phenotype data in step 3, you can incorporate categorical data into the density plots and categorical and continuous phenotype data into the rank plots, and use `create.pgs.with.continuous.phenotype.plot` to make scatterplots of your PGS against any continuous phenotype data. For more sophisticated downstream analysis, check how well the PGS predicts binary outcomes using `analyze.pgs.binary.predictiveness`.

For more step-by-step instructions, check out our [vignettes](https://CRAN.R-project.org/package=ApplyPolygenicScore).

Expand Down
4 changes: 2 additions & 2 deletions man/analyze.pgs.binary.predictiveness.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 22 additions & 22 deletions tests/testthat/test-pgs-statistics.R
Original file line number Diff line number Diff line change
Expand Up @@ -267,18 +267,18 @@ test_that(
# Test 1: `data` must be a data frame
expect_error(
analyze.pgs.binary.predictiveness(
data = list(),
pgs.data = list(),
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Binary.01',
phenotype.type = 'binary'
),
'`data` must be a data frame.'
'`pgs.data` must be a data frame.'
);

# Test 2: Not all specified `pgs.columns` found in the data frame
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = c('PGS.A', 'NonExistentPGS'),
phenotype.columns = 'Pheno.Binary.01',
phenotype.type = 'binary'
Expand All @@ -289,7 +289,7 @@ test_that(
# Test 3: All `pgs.columns` must be numeric
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'NonNumericPGS',
phenotype.columns = 'Pheno.Binary.01',
phenotype.type = 'binary'
Expand All @@ -300,7 +300,7 @@ test_that(
# Test 4: Not all specified `phenotype.columns` found in the data frame
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = c('Pheno.Binary.01', 'NonExistentPheno'),
phenotype.type = 'binary'
Expand All @@ -311,7 +311,7 @@ test_that(
# Test 5: `covariate.columns` not found
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Binary.01',
covariate.columns = 'NonExistentCovariate',
Expand All @@ -323,7 +323,7 @@ test_that(
# Test 6: `phenotype.type` is invalid (using match.arg's error)
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Binary.01',
phenotype.type = 'invalid_type'
Expand All @@ -339,7 +339,7 @@ test_that(
# Test 7a: Binary phenotype column is a factor but not 2 levels
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Binary.3Levels',
phenotype.type = 'binary'
Expand All @@ -351,7 +351,7 @@ test_that(
# Pheno.Binary.Char.YesNo is character, not 0/1 numeric
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Binary.Char.YesNo',
phenotype.type = 'binary'
Expand All @@ -362,7 +362,7 @@ test_that(
# Test 7c: Numeric binary phenotype is automatically converted to factor.
expect_warning( # Expect no error or warning for valid conversion
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Binary.Numeric.01',
phenotype.type = 'binary'
Expand All @@ -373,7 +373,7 @@ test_that(
# Test 7d: Binary phenotype that converts successfully (no error expected, but good for coverage)
expect_silent( # Expect no error or warning for valid conversion
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Binary.01',
phenotype.type = 'binary'
Expand All @@ -390,7 +390,7 @@ test_that(
test.data.bad.continuous$Pheno.Continuous.Num <- as.character(test.data.bad.continuous$Pheno.Continuous.Num);
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data.bad.continuous,
pgs.data = test.data.bad.continuous,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Continuous.Num',
phenotype.type = 'continuous',
Expand All @@ -402,7 +402,7 @@ test_that(
# Test 9: `cutoff.threshold` missing for continuous phenotype
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Continuous.Num',
phenotype.type = 'continuous',
Expand All @@ -414,7 +414,7 @@ test_that(
# Test 10: `cutoff.threshold` is a list but missing entry for specific phenotype
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Continuous.Num',
phenotype.type = 'continuous',
Expand All @@ -426,7 +426,7 @@ test_that(
# Test 11: `cutoff.threshold` is not numeric or a named list
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Continuous.Num',
phenotype.type = 'continuous',
Expand All @@ -438,7 +438,7 @@ test_that(
# Test 12: `cutoff.threshold` is a list but value for specific phenotype is NULL/not numeric
expect_error(
analyze.pgs.binary.predictiveness(
data = test.data,
pgs.data = test.data,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Continuous.Num',
phenotype.type = 'continuous',
Expand All @@ -465,7 +465,7 @@ test_that(
}
expect_error(
analyze.pgs.binary.predictiveness(
data = many.pgs.data,
pgs.data = many.pgs.data,
pgs.columns = paste0('PGS', 1:13),
phenotype.columns = 'Pheno.Binary.01',
phenotype.type = 'binary',
Expand All @@ -487,7 +487,7 @@ test_that(
);
expect_warning(
results <- analyze.pgs.binary.predictiveness(
data = test.data.no.complete.cases,
pgs.data = test.data.no.complete.cases,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Binary.01',
phenotype.type = 'binary',
Expand All @@ -506,7 +506,7 @@ test_that(
);
expect_warning(
results <- analyze.pgs.binary.predictiveness(
data = test.data.bad.glm,
pgs.data = test.data.bad.glm,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Binary.01',
phenotype.type = 'binary',
Expand All @@ -526,7 +526,7 @@ test_that(
);
expect_warning(
results <- analyze.pgs.binary.predictiveness(
data = test.data.bad.roc,
pgs.data = test.data.bad.roc,
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Binary.01',
phenotype.type = 'binary',
Expand All @@ -550,7 +550,7 @@ test_that(

# Scenario 1: Binary phenotypes, with covariates, return plot object
results.binary <- analyze.pgs.binary.predictiveness(
data = test.data, # Using the global test.data
pgs.data = test.data, # Using the global test.data
pgs.columns = c('PGS.A', 'PGS.B'),
phenotype.columns = c('Pheno.Binary.01', 'Pheno.Binary.TF'),
covariate.columns = c('Cov.Age', 'Cov.Sex'),
Expand Down Expand Up @@ -596,7 +596,7 @@ test_that(
# Scenario 2: Continuous phenotype, no covariates, save to file (roc.plot should be NULL in return)
# Use existing test.data
results.continuous <- analyze.pgs.binary.predictiveness(
data = test.data, # Using the global test.data
pgs.data = test.data, # Using the global test.data
pgs.columns = 'PGS.A',
phenotype.columns = 'Pheno.Continuous.Num',
covariate.columns = NULL,
Expand Down
Loading
Loading