Skip to content

Commit 3d8c12b

Browse files
committed
Merge branch 'assay_dev' into 9000
2 parents 97e32b9 + 939810d commit 3d8c12b

File tree

77 files changed

+40333
-5
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+40333
-5
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Authors@R: c(
66
person("Kent S", "Johnson", role = c("aut", "cre"),
77
email = "[email protected]"),
88
person("Carla", "Coltharp", role="ctb"),
9-
person("Akoya Biosciences", role="cph"))
9+
person("Akoya Biosciences", role=c("cph", "fnd")))
1010
Copyright: Akoya Biosciences
1111
Description: Creates diagnostic and summary reports from inForm data.
1212
Language: en-US
@@ -21,6 +21,7 @@ Imports:
2121
forcats,
2222
fs,
2323
ggplot2 (>= 3.3.2),
24+
glue,
2425
jpeg,
2526
knitr,
2627
magrittr,

NAMESPACE

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ export(addin_30_analysis_app)
66
export(addin_35_spatial_map_viewer)
77
export(addin_40_unmixing_quality)
88
export(addin_50_component_levels)
9+
export(addin_60_mean_of_top_and_bottom_cells)
10+
export(addin_70_staining_consistency_report)
911
export(choose_directory)
1012
export(choose_files)
1113
export(component_levels_report)
@@ -22,19 +24,23 @@ export(count_phenotypes)
2224
export(count_within_summary)
2325
export(counts_to_percents)
2426
export(discrete_colors)
27+
export(mean_of_top_and_bottom_cells)
28+
export(mean_of_top_and_bottom_cells_report)
2529
export(merge_cell_seg_files)
2630
export(nearest_neighbor_map)
2731
export(nearest_neighbor_summary)
2832
export(order_by_slide_and_tissue_category)
2933
export(spatial_map_viewer)
3034
export(split_phenotypes)
35+
export(staining_consistency_report)
3136
export(unmixing_quality_report)
3237
export(upset_plot)
3338
export(write_count_within_sheet)
3439
export(write_counts_sheet)
3540
export(write_density_sheet)
3641
export(write_expression_sheet)
3742
export(write_h_score_sheet)
43+
export(write_mean_of_top_and_bottom_charts)
3844
export(write_nearest_neighbor_summary_sheet)
3945
export(write_percents_sheet)
4046
export(write_plot_sheet)

NEWS.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
# phenoptrReports 0.2.13.9000
22

3+
Staining consistency report:
4+
- New addin creates a report showing mean expression of a selected marker
5+
across multiple samples.
6+
7+
Mean of top 20 / bottom 10 addin:
8+
- New addin helps to evaluate whether the staining quality is
9+
likely to produce good unmixing. See
10+
https://akoyabio.github.io/phenoptrReports/articles/top_20_bottom_10_report.html
11+
for details.
12+
313
Analysis addin:
414
- Fix problem computing mean expression for phenotypes with formulas
515
in their definition #47

R/mean_of_top_and_bottom_cells.R

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
# Top 20 and bottom 10%ile
2+
3+
#' Create reports showing mean expression of top and bottom cells
4+
#' @param merge_file Path to a merged or consolidated cell seg data file
5+
#' @param config_file Path to a configuration file containing
6+
#' column names in the merge file, one name per line.
7+
#' @param by Column to aggregate by, e.g. "Slide ID" or "Annotation ID".
8+
#' @param tissue_categories Tissue categories to report, or NULL to use all.
9+
#' @param adjacent_max The maximum ratio between expression of adjacent fluors.
10+
#' @return None
11+
#' @export
12+
mean_of_top_and_bottom_cells_report =
13+
function(merge_file, config_file, by, tissue_categories, adjacent_max) {
14+
# Read the configuration. Each line should be a column name
15+
cols = readLines(config_file) %>%
16+
purrr::map_chr(stringr::str_trim) %>%
17+
purrr::discard(~.x=="")
18+
19+
# Create the Excel file
20+
excel_path = mean_of_top_and_bottom_cells(
21+
merge_file, cols, adjacent_max=adjacent_max,
22+
tissue_categories=tissue_categories,
23+
.by=by)
24+
25+
# Create the charts
26+
charts_path = stringr::str_replace(excel_path, 'xlsx$', 'docx')
27+
write_mean_of_top_and_bottom_charts(excel_path, charts_path, .by=by)
28+
29+
cat('Reports written to\n', excel_path, '\n', charts_path, '\n')
30+
}
31+
32+
#' Compute mean expression levels of the top and bottom expressing cells,
33+
#' per Slide ID or field. This creates a quality report.
34+
#'
35+
#' Report the results in an Excel workbook in the same directory
36+
#' as `csd_path`.
37+
#' @param csd_path Path to a merged cell seg data, or NULL. If NULL, a file
38+
#' chooser is opened to allow file selection.
39+
#' @param expression_cols Vector of column names to report.
40+
#' @param top_count The number of top-expressing cells to select for
41+
#' averaging.
42+
#' @param bottom_percentile The cutoff for the bottom percentile cells.
43+
#' @param adjacent_max The maximum ratio between expression of adjacent fluors
44+
#' @param tissue_categories Tissue categories to include, or NULL for all
45+
#' @param .by The column to aggregate by
46+
#' @param out_path The path to the output file; if `NULL`, a date-stamped file
47+
#' will be written to the same directory as `csd_path`.
48+
#' @return The path to the generated file.
49+
#' @importFrom magrittr %>%
50+
#' @importFrom rlang .data
51+
#' @export
52+
mean_of_top_and_bottom_cells =
53+
function(csd_path=NULL, expression_cols,
54+
top_count=20, bottom_percentile=0.1, adjacent_max = 3,
55+
tissue_categories=NULL, .by='Slide ID', out_path=NULL)
56+
{
57+
if (is.null(csd_path))
58+
csd_path = file.choose()
59+
60+
# Read the merged cell seg data file
61+
csd = phenoptr::read_cell_seg_data(csd_path)
62+
63+
if (!is.null(tissue_categories) && length(tissue_categories) > 0)
64+
csd = csd %>% dplyr::filter(.data$`Tissue Category` %in% tissue_categories)
65+
66+
# Define phenotypes and expression
67+
phenotypes = phenoptr::parse_phenotypes('Total Cells')
68+
69+
# Check that all the expression columns exist
70+
missing = !(expression_cols %in% names(csd))
71+
if (any(missing)) {
72+
missing = expression_cols[missing]
73+
stop('Columns missing from cell seg data: ', paste(missing, collapse=', '))
74+
}
75+
76+
77+
params = rlang::set_names(expression_cols,
78+
rep('Total Cells', length(expression_cols))) %>%
79+
as.list()
80+
81+
# Compute top and bottom means
82+
top20 = phenoptrReports::compute_mean_expression_many(
83+
csd, phenotypes, params, count=top_count, .by=.by) %>%
84+
dplyr::select(!!.by, dplyr::everything())
85+
bottom10 = phenoptrReports::compute_mean_expression_many(
86+
csd, phenotypes, params, percentile=-bottom_percentile, .by=.by) %>%
87+
dplyr::select(!!.by, dplyr::everything())
88+
89+
# Clean up column names, remove everything before the compartment and ' Mean'
90+
clean_names = function(n) {
91+
n %>% stringr::str_remove('^.*(?=Nucleus|Cytoplasm|Membrane)') %>%
92+
stringr::str_remove(' Mean')
93+
}
94+
95+
names(top20) = clean_names(names(top20))
96+
names(bottom10) = clean_names(names(bottom10))
97+
98+
# Ratio of means, top / bottom
99+
ratio_top_to_bottom = purrr::map2(top20, bottom10, function(t, b) {
100+
if (!is.numeric(t)) return(t)
101+
t / b
102+
}) %>% tibble::as_tibble()
103+
104+
# Ratio of means of top cells, adjacent fluors
105+
ratio_data = top20 %>%
106+
dplyr::select(-!!.by)
107+
first_fluors = ratio_data %>% dplyr::select(-ncol(ratio_data))
108+
second_fluors = ratio_data %>% dplyr::select(-1)
109+
ratio_adjacent = purrr::map2(first_fluors, second_fluors, ~.x/.y) %>%
110+
purrr::set_names(purrr::map2(names(first_fluors), names(second_fluors),
111+
~paste0(.x, ' / ', .y))) %>%
112+
tibble::as_tibble() %>%
113+
tibble::add_column(top20[.by], .before=1)
114+
115+
# Write it out
116+
red_style = openxlsx::createStyle(fontColour='red')
117+
118+
# How many non-data columns are there?
119+
# The main header starts in the next column
120+
header_col = 2
121+
122+
if (is.null(out_path))
123+
out_path = file.path(dirname(csd_path),
124+
stringr::str_glue(
125+
"{format(Sys.Date(), '%Y%m%d')}_Top{top_count}_Bottom{bottom_percentile*100}_Data.xlsx"))
126+
wb = openxlsx::createWorkbook()
127+
128+
write_and_style_sheet(wb, top20,
129+
stringr::str_glue('Top {top_count} data'),
130+
stringr::str_glue('Mean expression of top {top_count} cells'),
131+
header_col)
132+
133+
write_and_style_sheet(wb, bottom10,
134+
stringr::str_glue('Bottom {bottom_percentile*100}%ile data'),
135+
stringr::str_glue(
136+
'Mean expression of bottom {bottom_percentile*100}%ile cells'),
137+
header_col)
138+
139+
sheet_name = 'Ratio top to bottom'
140+
write_and_style_sheet(wb, ratio_top_to_bottom,
141+
sheet_name,
142+
stringr::str_glue(
143+
'Ratio of means, top {top_count} cells / bottom {bottom_percentile*100}%ile cells'),
144+
header_col)
145+
146+
# Mark out-of-range rows for Opal 780 ratio if present
147+
col_780 = stringr::str_subset(names(ratio_top_to_bottom), '780')
148+
if (length(col_780)==1) {
149+
top_to_bottom_min = 30 # Opal 780 ratio should be greater than this
150+
151+
red_rows = which(ratio_top_to_bottom[[col_780]] < top_to_bottom_min)
152+
if (length(red_rows) > 0) {
153+
red_rows = red_rows + 2 # skip header rows
154+
red_cols = which(names(ratio_top_to_bottom) == col_780)
155+
openxlsx::addStyle(wb, sheet_name, red_style,
156+
rows=red_rows, cols=red_cols,
157+
gridExpand=TRUE, stack=TRUE)
158+
}
159+
}
160+
161+
sheet_name = 'Ratio adjacent fluors'
162+
write_and_style_sheet(wb, ratio_adjacent,
163+
sheet_name,
164+
stringr::str_glue(
165+
'Ratio of means, top {top_count} cells of adjacent fluors'),
166+
header_col)
167+
168+
red_cells = (ratio_adjacent[-1] > adjacent_max
169+
| ratio_adjacent[-1] < 1/adjacent_max)
170+
red_cells = which(as.matrix(red_cells), arr.ind=TRUE)
171+
if (dim(red_cells)[1] > 0) {
172+
openxlsx::addStyle(wb, sheet_name, red_style,
173+
rows=red_cells[,'row']+2, cols=red_cells[,'col']+1,
174+
gridExpand=FALSE, stack=TRUE)
175+
}
176+
177+
openxlsx::saveWorkbook(wb, out_path, overwrite = TRUE)
178+
179+
return(out_path)
180+
}
181+
182+
write_and_style_sheet =
183+
function(wb, d, sheet_name, sheet_title, header_col) {
184+
phenoptrReports::write_sheet(wb, d, sheet_name, sheet_title, header_col)
185+
data_cols = header_col:ncol(d)
186+
openxlsx::setColWidths(wb, sheet_name, data_cols, 14)
187+
188+
number_style=openxlsx::createStyle(numFmt='0.0000')
189+
openxlsx::addStyle(wb, sheet_name, number_style,
190+
rows=seq_len(nrow(d))+2, cols=data_cols,
191+
gridExpand=TRUE, stack=TRUE)
192+
}
193+
194+
#' Create summary charts from the results of `mean_of_top_and_bottom_cells`
195+
#'
196+
#' Create a Microsoft Word file or HTML document containing summary charts
197+
#' derived from the output of [mean_of_top_and_bottom_cells]
198+
#' with default parameters.
199+
#' The file type is determined by the file extension
200+
#' of `output_path`, which must be either `.docx` or `.html`.
201+
#' @param worksheet_path Path to an Excel file containing sheets written
202+
#' by [mean_of_top_and_bottom_cells].
203+
#' @param output_path Path to write the resulting file.
204+
#' @param .by The aggregation column name
205+
#' @export
206+
write_mean_of_top_and_bottom_charts =
207+
function(worksheet_path, output_path, .by='Slide ID') {
208+
stopifnot(file.exists(worksheet_path))
209+
210+
if (is.null(output_path))
211+
stop('You must provide an output path.')
212+
213+
output_format = switch(tools::file_ext(output_path),
214+
docx='word_document',
215+
html='html_vignette')
216+
217+
if (is.null(output_format))
218+
stop('Unsupported output format')
219+
220+
rmd_path = system.file("rmd", "Mean_of_top_and_bottom_charts.Rmd",
221+
package="phenoptrReports")
222+
223+
rmarkdown::render(rmd_path, output_file=output_path, quiet=TRUE,
224+
output_format=output_format,
225+
params=list(data_path=worksheet_path, .by=.by))
226+
}

0 commit comments

Comments
 (0)