|
| 1 | +# Top 20 and bottom 10%ile |
| 2 | + |
| 3 | +#' Create reports showing mean expression of top and bottom cells |
| 4 | +#' @param merge_file Path to a merged or consolidated cell seg data file |
| 5 | +#' @param config_file Path to a configuration file containing |
| 6 | +#' column names in the merge file, one name per line. |
| 7 | +#' @param by Column to aggregate by, e.g. "Slide ID" or "Annotation ID". |
| 8 | +#' @param tissue_categories Tissue categories to report, or NULL to use all. |
| 9 | +#' @param adjacent_max The maximum ratio between expression of adjacent fluors. |
| 10 | +#' @return None |
| 11 | +#' @export |
| 12 | +mean_of_top_and_bottom_cells_report = |
| 13 | + function(merge_file, config_file, by, tissue_categories, adjacent_max) { |
| 14 | + # Read the configuration. Each line should be a column name |
| 15 | + cols = readLines(config_file) %>% |
| 16 | + purrr::map_chr(stringr::str_trim) %>% |
| 17 | + purrr::discard(~.x=="") |
| 18 | + |
| 19 | + # Create the Excel file |
| 20 | + excel_path = mean_of_top_and_bottom_cells( |
| 21 | + merge_file, cols, adjacent_max=adjacent_max, |
| 22 | + tissue_categories=tissue_categories, |
| 23 | + .by=by) |
| 24 | + |
| 25 | + # Create the charts |
| 26 | + charts_path = stringr::str_replace(excel_path, 'xlsx$', 'docx') |
| 27 | + write_mean_of_top_and_bottom_charts(excel_path, charts_path, .by=by) |
| 28 | + |
| 29 | + cat('Reports written to\n', excel_path, '\n', charts_path, '\n') |
| 30 | +} |
| 31 | + |
| 32 | +#' Compute mean expression levels of the top and bottom expressing cells, |
| 33 | +#' per Slide ID or field. This creates a quality report. |
| 34 | +#' |
| 35 | +#' Report the results in an Excel workbook in the same directory |
| 36 | +#' as `csd_path`. |
| 37 | +#' @param csd_path Path to a merged cell seg data, or NULL. If NULL, a file |
| 38 | +#' chooser is opened to allow file selection. |
| 39 | +#' @param expression_cols Vector of column names to report. |
| 40 | +#' @param top_count The number of top-expressing cells to select for |
| 41 | +#' averaging. |
| 42 | +#' @param bottom_percentile The cutoff for the bottom percentile cells. |
| 43 | +#' @param adjacent_max The maximum ratio between expression of adjacent fluors |
| 44 | +#' @param tissue_categories Tissue categories to include, or NULL for all |
| 45 | +#' @param .by The column to aggregate by |
| 46 | +#' @param out_path The path to the output file; if `NULL`, a date-stamped file |
| 47 | +#' will be written to the same directory as `csd_path`. |
| 48 | +#' @return The path to the generated file. |
| 49 | +#' @importFrom magrittr %>% |
| 50 | +#' @importFrom rlang .data |
| 51 | +#' @export |
| 52 | +mean_of_top_and_bottom_cells = |
| 53 | + function(csd_path=NULL, expression_cols, |
| 54 | + top_count=20, bottom_percentile=0.1, adjacent_max = 3, |
| 55 | + tissue_categories=NULL, .by='Slide ID', out_path=NULL) |
| 56 | +{ |
| 57 | + if (is.null(csd_path)) |
| 58 | + csd_path = file.choose() |
| 59 | + |
| 60 | + # Read the merged cell seg data file |
| 61 | + csd = phenoptr::read_cell_seg_data(csd_path) |
| 62 | + |
| 63 | + if (!is.null(tissue_categories) && length(tissue_categories) > 0) |
| 64 | + csd = csd %>% dplyr::filter(.data$`Tissue Category` %in% tissue_categories) |
| 65 | + |
| 66 | + # Define phenotypes and expression |
| 67 | + phenotypes = phenoptr::parse_phenotypes('Total Cells') |
| 68 | + |
| 69 | + # Check that all the expression columns exist |
| 70 | + missing = !(expression_cols %in% names(csd)) |
| 71 | + if (any(missing)) { |
| 72 | + missing = expression_cols[missing] |
| 73 | + stop('Columns missing from cell seg data: ', paste(missing, collapse=', ')) |
| 74 | + } |
| 75 | + |
| 76 | + |
| 77 | + params = rlang::set_names(expression_cols, |
| 78 | + rep('Total Cells', length(expression_cols))) %>% |
| 79 | + as.list() |
| 80 | + |
| 81 | + # Compute top and bottom means |
| 82 | + top20 = phenoptrReports::compute_mean_expression_many( |
| 83 | + csd, phenotypes, params, count=top_count, .by=.by) %>% |
| 84 | + dplyr::select(!!.by, dplyr::everything()) |
| 85 | + bottom10 = phenoptrReports::compute_mean_expression_many( |
| 86 | + csd, phenotypes, params, percentile=-bottom_percentile, .by=.by) %>% |
| 87 | + dplyr::select(!!.by, dplyr::everything()) |
| 88 | + |
| 89 | + # Clean up column names, remove everything before the compartment and ' Mean' |
| 90 | + clean_names = function(n) { |
| 91 | + n %>% stringr::str_remove('^.*(?=Nucleus|Cytoplasm|Membrane)') %>% |
| 92 | + stringr::str_remove(' Mean') |
| 93 | + } |
| 94 | + |
| 95 | + names(top20) = clean_names(names(top20)) |
| 96 | + names(bottom10) = clean_names(names(bottom10)) |
| 97 | + |
| 98 | + # Ratio of means, top / bottom |
| 99 | + ratio_top_to_bottom = purrr::map2(top20, bottom10, function(t, b) { |
| 100 | + if (!is.numeric(t)) return(t) |
| 101 | + t / b |
| 102 | + }) %>% tibble::as_tibble() |
| 103 | + |
| 104 | + # Ratio of means of top cells, adjacent fluors |
| 105 | + ratio_data = top20 %>% |
| 106 | + dplyr::select(-!!.by) |
| 107 | + first_fluors = ratio_data %>% dplyr::select(-ncol(ratio_data)) |
| 108 | + second_fluors = ratio_data %>% dplyr::select(-1) |
| 109 | + ratio_adjacent = purrr::map2(first_fluors, second_fluors, ~.x/.y) %>% |
| 110 | + purrr::set_names(purrr::map2(names(first_fluors), names(second_fluors), |
| 111 | + ~paste0(.x, ' / ', .y))) %>% |
| 112 | + tibble::as_tibble() %>% |
| 113 | + tibble::add_column(top20[.by], .before=1) |
| 114 | + |
| 115 | + # Write it out |
| 116 | + red_style = openxlsx::createStyle(fontColour='red') |
| 117 | + |
| 118 | + # How many non-data columns are there? |
| 119 | + # The main header starts in the next column |
| 120 | + header_col = 2 |
| 121 | + |
| 122 | + if (is.null(out_path)) |
| 123 | + out_path = file.path(dirname(csd_path), |
| 124 | + stringr::str_glue( |
| 125 | + "{format(Sys.Date(), '%Y%m%d')}_Top{top_count}_Bottom{bottom_percentile*100}_Data.xlsx")) |
| 126 | + wb = openxlsx::createWorkbook() |
| 127 | + |
| 128 | + write_and_style_sheet(wb, top20, |
| 129 | + stringr::str_glue('Top {top_count} data'), |
| 130 | + stringr::str_glue('Mean expression of top {top_count} cells'), |
| 131 | + header_col) |
| 132 | + |
| 133 | + write_and_style_sheet(wb, bottom10, |
| 134 | + stringr::str_glue('Bottom {bottom_percentile*100}%ile data'), |
| 135 | + stringr::str_glue( |
| 136 | + 'Mean expression of bottom {bottom_percentile*100}%ile cells'), |
| 137 | + header_col) |
| 138 | + |
| 139 | + sheet_name = 'Ratio top to bottom' |
| 140 | + write_and_style_sheet(wb, ratio_top_to_bottom, |
| 141 | + sheet_name, |
| 142 | + stringr::str_glue( |
| 143 | + 'Ratio of means, top {top_count} cells / bottom {bottom_percentile*100}%ile cells'), |
| 144 | + header_col) |
| 145 | + |
| 146 | + # Mark out-of-range rows for Opal 780 ratio if present |
| 147 | + col_780 = stringr::str_subset(names(ratio_top_to_bottom), '780') |
| 148 | + if (length(col_780)==1) { |
| 149 | + top_to_bottom_min = 30 # Opal 780 ratio should be greater than this |
| 150 | + |
| 151 | + red_rows = which(ratio_top_to_bottom[[col_780]] < top_to_bottom_min) |
| 152 | + if (length(red_rows) > 0) { |
| 153 | + red_rows = red_rows + 2 # skip header rows |
| 154 | + red_cols = which(names(ratio_top_to_bottom) == col_780) |
| 155 | + openxlsx::addStyle(wb, sheet_name, red_style, |
| 156 | + rows=red_rows, cols=red_cols, |
| 157 | + gridExpand=TRUE, stack=TRUE) |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + sheet_name = 'Ratio adjacent fluors' |
| 162 | + write_and_style_sheet(wb, ratio_adjacent, |
| 163 | + sheet_name, |
| 164 | + stringr::str_glue( |
| 165 | + 'Ratio of means, top {top_count} cells of adjacent fluors'), |
| 166 | + header_col) |
| 167 | + |
| 168 | + red_cells = (ratio_adjacent[-1] > adjacent_max |
| 169 | + | ratio_adjacent[-1] < 1/adjacent_max) |
| 170 | + red_cells = which(as.matrix(red_cells), arr.ind=TRUE) |
| 171 | + if (dim(red_cells)[1] > 0) { |
| 172 | + openxlsx::addStyle(wb, sheet_name, red_style, |
| 173 | + rows=red_cells[,'row']+2, cols=red_cells[,'col']+1, |
| 174 | + gridExpand=FALSE, stack=TRUE) |
| 175 | + } |
| 176 | + |
| 177 | + openxlsx::saveWorkbook(wb, out_path, overwrite = TRUE) |
| 178 | + |
| 179 | + return(out_path) |
| 180 | +} |
| 181 | + |
| 182 | +write_and_style_sheet = |
| 183 | + function(wb, d, sheet_name, sheet_title, header_col) { |
| 184 | + phenoptrReports::write_sheet(wb, d, sheet_name, sheet_title, header_col) |
| 185 | + data_cols = header_col:ncol(d) |
| 186 | + openxlsx::setColWidths(wb, sheet_name, data_cols, 14) |
| 187 | + |
| 188 | + number_style=openxlsx::createStyle(numFmt='0.0000') |
| 189 | + openxlsx::addStyle(wb, sheet_name, number_style, |
| 190 | + rows=seq_len(nrow(d))+2, cols=data_cols, |
| 191 | + gridExpand=TRUE, stack=TRUE) |
| 192 | +} |
| 193 | + |
| 194 | +#' Create summary charts from the results of `mean_of_top_and_bottom_cells` |
| 195 | +#' |
| 196 | +#' Create a Microsoft Word file or HTML document containing summary charts |
| 197 | +#' derived from the output of [mean_of_top_and_bottom_cells] |
| 198 | +#' with default parameters. |
| 199 | +#' The file type is determined by the file extension |
| 200 | +#' of `output_path`, which must be either `.docx` or `.html`. |
| 201 | +#' @param worksheet_path Path to an Excel file containing sheets written |
| 202 | +#' by [mean_of_top_and_bottom_cells]. |
| 203 | +#' @param output_path Path to write the resulting file. |
| 204 | +#' @param .by The aggregation column name |
| 205 | +#' @export |
| 206 | +write_mean_of_top_and_bottom_charts = |
| 207 | + function(worksheet_path, output_path, .by='Slide ID') { |
| 208 | + stopifnot(file.exists(worksheet_path)) |
| 209 | + |
| 210 | + if (is.null(output_path)) |
| 211 | + stop('You must provide an output path.') |
| 212 | + |
| 213 | + output_format = switch(tools::file_ext(output_path), |
| 214 | + docx='word_document', |
| 215 | + html='html_vignette') |
| 216 | + |
| 217 | + if (is.null(output_format)) |
| 218 | + stop('Unsupported output format') |
| 219 | + |
| 220 | + rmd_path = system.file("rmd", "Mean_of_top_and_bottom_charts.Rmd", |
| 221 | + package="phenoptrReports") |
| 222 | + |
| 223 | + rmarkdown::render(rmd_path, output_file=output_path, quiet=TRUE, |
| 224 | + output_format=output_format, |
| 225 | + params=list(data_path=worksheet_path, .by=.by)) |
| 226 | +} |
0 commit comments