|
| 1 | +#' Calculate MAD Outlier Map |
| 2 | +#' |
| 3 | +#' Calculate the median absolute deviation (statistical) outliers measurements |
| 4 | +#' and fold-change criteria from an ADAT. Two values are required for the |
| 5 | +#' calculation: median absolute deviation (MAD) and fold-change (FC). Outliers |
| 6 | +#' are determined based on the result of _both_ `6*MAD` and `x*FC` , where `x` |
| 7 | +#' is the number of fold changes defined. |
| 8 | +#' |
| 9 | +#' For the S3 plotting method, see [plot.Map()]. |
| 10 | +#' |
| 11 | +#' @family Calc Map |
| 12 | +#' @param data A `soma_adat` object containing RFU feature data. |
| 13 | +#' @param anno_tbl An annotations table produced via [getAnalyteInfo()]. |
| 14 | +#' Used to calculate analyte dilutions for the matrix column ordering. |
| 15 | +#' If `NULL`, a table is generated internally from `data` (if possible), and |
| 16 | +#' the analytes are plotted in dilution order. |
| 17 | +#' @param apt.order Character. How should the columns/features be ordered? |
| 18 | +#' Options include: by dilution mix ("dilution"), by median overall signal |
| 19 | +#' ("signal"), or as-is in `data` (default). |
| 20 | +#' @param sample.order Either a character string indicating the column name |
| 21 | +#' with entries to be used to order the data frame rows, or a numeric vector |
| 22 | +#' representing the order of the data frame rows. The |
| 23 | +#' default (`NULL`) leaves the row ordering as it is in `data`. |
| 24 | +#' @param fc.crit Integer. The fold change criterion to evaluate. Defaults to 5x. |
| 25 | +#' @return A list of class `c("outlier_map", "Map")` containing: |
| 26 | +#' \item{matrix}{A boolean matrix of `TRUE/FALSE` whether each sample is an |
| 27 | +#' outlier according the the stated criteria.} |
| 28 | +#' \item{x.lab}{A character string containing the plot x-axis label.} |
| 29 | +#' \item{title}{A character string containing the plot title.} |
| 30 | +#' \item{rows.by.freq}{A logical indicating if the samples are ordered |
| 31 | +#' by outlier frequency.} |
| 32 | +#' \item{class.tab}{A table containing the frequencies of each class if input |
| 33 | +#' `sample.order` is defined as a categorical variable.} |
| 34 | +#' \item{sample.order}{A numeric vector representing the order of the data |
| 35 | +#' frame rows.} |
| 36 | +#' \item{legend.sub}{A character string containing the plot legend subtitle.} |
| 37 | +#' @author Stu Field |
| 38 | +#' @examples |
| 39 | +#' om <- calcOutlierMap(example_data) |
| 40 | +#' class(om) |
| 41 | +#' |
| 42 | +#' # S3 print method |
| 43 | +#' om |
| 44 | +#' |
| 45 | +#' # `sample.order = "frequency"` orders samples by outlier frequency |
| 46 | +#' om <- calcOutlierMap(example_data, sample.order = "frequency") |
| 47 | +#' om$rows.by.freq |
| 48 | +#' om$sample.order |
| 49 | +#' |
| 50 | +#' # order samples by user specified indices |
| 51 | +#' om <- calcOutlierMap(example_data, sample.order = 192:1) |
| 52 | +#' om$sample.order |
| 53 | +#' |
| 54 | +#' # order samples field in Adat |
| 55 | +#' om <- calcOutlierMap(example_data, sample.order = "Sex") |
| 56 | +#' om$sample.order |
| 57 | +#' @importFrom stats median |
| 58 | + |
| 59 | +#' @export |
| 60 | +calcOutlierMap <- function(data, anno_tbl = NULL, |
| 61 | + apt.order = c(NA, "dilution", "signal"), |
| 62 | + sample.order = NULL, fc.crit = 5) { |
| 63 | + |
| 64 | + apt.order <- match.arg(apt.order) |
| 65 | + data <- .refactorData(data) |
| 66 | + sampleL <- length(sample.order) |
| 67 | + freq <- sampleL == 1L && tolower(sample.order) %in% "frequency" |
| 68 | + class_tab <- NA |
| 69 | + ord <- seq_len(nrow(data)) |
| 70 | + ret <- list(matrix = matrix(0)) # placeholder: reserve position 1 |
| 71 | + |
| 72 | + if ( is.null(anno_tbl) ) { |
| 73 | + anno_tbl <- getAnalyteInfo(data) |
| 74 | + } |
| 75 | + |
| 76 | + # Order of the rows in the Map |
| 77 | + if ( !is.null(sample.order) && !freq ) { |
| 78 | + if ( sampleL > 1L && is.numeric(sample.order) ) { |
| 79 | + if ( sampleL != nrow(data) ) { |
| 80 | + stop( |
| 81 | + "Incorrect number of row indices: ", value(nrow(data)), |
| 82 | + "rows vs. ", value(sampleL), " indices.", call. = FALSE |
| 83 | + ) |
| 84 | + } else { |
| 85 | + data <- data[sample.order, ] |
| 86 | + ord <- sample.order |
| 87 | + ret$y.lab <- "Samples (User Specified Order)" |
| 88 | + } |
| 89 | + } else if ( sampleL == 1L && is.character(sample.order) ) { |
| 90 | + stopifnot(sample.order %in% names(data)) |
| 91 | + ord <- order(data[[sample.order]]) |
| 92 | + data <- data[ord, ] |
| 93 | + class_tab <- table(data[[sample.order]]) |
| 94 | + ret$y.lab <- sprintf("Samples (by %s)", sample.order) |
| 95 | + } else { |
| 96 | + stop( |
| 97 | + "Something wrong with `sample.order =` argument: ", |
| 98 | + value(sample.order), call. = FALSE |
| 99 | + ) |
| 100 | + } |
| 101 | + } |
| 102 | + |
| 103 | + data_mat <- data.matrix(data[, getAnalytes(data)]) |
| 104 | + |
| 105 | + # calc statistical outliers matrix (boolean matrix of TRUE/FALSE) |
| 106 | + mat <- apply(data_mat, 2, function(.apt) { |
| 107 | + seq_along(.apt) %in% .getOutliers(.apt, fc.crit) |
| 108 | + }) |
| 109 | + rownames(mat) <- rownames(data_mat) # rownames stripped by apply() |
| 110 | + |
| 111 | + if ( sum(mat) == 0 ) { |
| 112 | + warning("No outliers detected in outlier map!", call. = FALSE) |
| 113 | + } |
| 114 | + |
| 115 | + if ( freq ) { |
| 116 | + mat <- mat[ names(sort(rowSums(mat))), ] |
| 117 | + ret$y.lab <- "Samples Ordered by Outlier Frequency" |
| 118 | + } |
| 119 | + |
| 120 | + if ( is.na(apt.order) ) { |
| 121 | + |
| 122 | + ret$x.lab <- "Proteins Ordered in Adat" |
| 123 | + |
| 124 | + } else if ( apt.order == "dilution" ) { |
| 125 | + |
| 126 | + apt.dils <- .getDilList(anno_tbl) |
| 127 | + mat <- mat[, unlist(apt.dils)] |
| 128 | + ret$dil.nums <- lengths(apt.dils) |
| 129 | + ret$col.order <- "dilution" |
| 130 | + ret$x.lab <- sprintf("Dilution Ordered Proteins (%s)", |
| 131 | + paste(names(apt.dils), collapse = " | ")) |
| 132 | + |
| 133 | + } else if ( apt.order == "signal" ) { |
| 134 | + |
| 135 | + signal.order <- sort(apply(data_mat, 2, stats::median)) |
| 136 | + mat <- mat[, names(signal.order)] |
| 137 | + ret$col.order <- "signal" |
| 138 | + ret$x.lab <- "Proteins by Median Signal" |
| 139 | + |
| 140 | + } else { |
| 141 | + stop("Problem with `apt.order =` argument: ", |
| 142 | + value(apt.order), call. = FALSE) |
| 143 | + } |
| 144 | + |
| 145 | + ret$title <- paste0( |
| 146 | + "Outlier Map: | x - median(x) | > 6 * mad(x) & FC > ", fc.crit, "x" |
| 147 | + ) |
| 148 | + ret$rows.by.freq <- freq |
| 149 | + ret$class.tab <- class_tab |
| 150 | + ret$sample.order <- ord |
| 151 | + ret$matrix <- mat |
| 152 | + ret$legend.sub <- "Proteins" |
| 153 | + invisible( |
| 154 | + structure( |
| 155 | + ret, |
| 156 | + class = c("outlier_map", "Map", "list") |
| 157 | + ) |
| 158 | + ) |
| 159 | +} |
| 160 | + |
| 161 | + |
| 162 | +#' @describeIn calcOutlierMap |
| 163 | +#' There is a S3 print method for class `"outlier_map"`. |
| 164 | +#' @param x An object of class `"outlier_map"`. |
| 165 | +#' @param ... Arguments for S3 print methods. |
| 166 | +#' @export |
| 167 | +print.outlier_map <- function(x, ...) { |
| 168 | + writeLines( |
| 169 | + cli_rule("SomaLogic Outlier Map", line = "double", line_col = "magenta") |
| 170 | + ) |
| 171 | + key <- c( |
| 172 | + "Outlier Map dimensions", |
| 173 | + "Title", |
| 174 | + "Class Table", |
| 175 | + "Rows by Frequency", |
| 176 | + "Sample Order", |
| 177 | + "x-label", |
| 178 | + "Legend Sub-title") |> .pad(25) |
| 179 | + value <- c( |
| 180 | + .value(paste(dim(x$matrix), collapse = " x ")), |
| 181 | + .value(x$title), |
| 182 | + c(x$class.tab), |
| 183 | + x$rows.by.freq, |
| 184 | + .value(x$x.lab), |
| 185 | + .value(x$sample.order), |
| 186 | + .value(x$legend.sub) |
| 187 | + ) |
| 188 | + writeLines(paste(" ", key, value)) |
| 189 | + writeLines(cli_rule(line = "double", line_col = "green")) |
| 190 | + invisible(x) |
| 191 | +} |
| 192 | + |
| 193 | + |
| 194 | +#' S3 plot methods for class outlier_map |
| 195 | +#' @noRd |
| 196 | +#' @export |
| 197 | +plot.outlier_map <- function(x, ...) { |
| 198 | + NextMethod("plot", type = "outlier") |
| 199 | +} |
0 commit comments