-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpowerprior.R
145 lines (127 loc) · 5.68 KB
/
powerprior.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Processing power prior -------------------------------------------------------------
#' Extract power prior from historical posterior summary statistics and put in nice format for univariate models
#'
#' @param par_historical Dataframe of summary statistics of population parameters (for only one model)
#'
#' @return Named list (prefixed by "historical_") of parameters in par_historical:
#' - the first element/column correspond to the mean
#' - the second element/column corresponds to the standard deviation.
extract_powerprior_uni <- function(par_historical) {
# Can be useful to integrate to EczemaPred
stopifnot(is.data.frame(par_historical),
all(c("Variable", "Mean", "sd") %in% colnames(par_historical)))
par_names <- unique(par_historical[["Variable"]])
power_prior <- lapply(par_names,
function(x) {
tmp <- par_historical %>%
filter(Variable %in% x) %>%
arrange(Index)
out <- cbind(tmp[["Mean"]], tmp[["sd"]])
if (nrow(tmp) == 1) {
out <- c(out)
}
return(out)
})
names(power_prior) <- paste0(par_names)
stopifnot(all(do.call(rbind, power_prior)[, 2] >= 0))
return(power_prior)
}
#' Extract and process power prior for multivariate models
#'
#' @param par_historical Dataframe containing posterior summary statistics for all items.
#' It should notably associate each item to a unique Item ID and Distribution ID.
#'
#' @return List containing power prior to add to Stan data input
process_powerprior_FullModel <- function(par_historical) {
stopifnot(is.data.frame(par_historical),
all(c("Distribution", "ItemID") %in% colnames(par_historical)))
# Other columns are checked in extract_powerprior_uni
comp1 <- par_historical %>% filter(Distribution == 1) %>% distinct(ItemID) %>% pull()
D1 <- length(comp1)
comp2 <- par_historical %>% filter(Distribution == 2) %>% distinct(ItemID) %>% pull()
D2 <- length(comp2)
D <- D1 + D2
# Extract power prior for each item
lpp <- lapply(1:D,
function(d) {
par_historical %>%
filter(ItemID == d) %>%
extract_powerprior_uni()
})
# Everything but delta
power_prior <- lapply(lpp,
function(x) {
x[["delta"]] <- NULL
return(x)
}) %>%
merge_lists(along = 0)
# Deal with delta
tmp <- list(lpp[comp1], lpp[comp2])
pp_delta <- lapply(1:length(tmp),
function(i) {
lapply(tmp[[i]],
function(x) {
out <- x[c("delta")]
names(out) <- paste0(names(out), i)
return(out)
}) %>%
merge_lists(along = 0)
}) %>%
unlist(recursive = FALSE)
power_prior <- c(pp_delta, power_prior)
return(power_prior)
}
# Power prior influence ---------------------------------------------------
#' Associate forgetting factor `a0` to the relative importance `rho` of new dataset in the final posterior
#'
#' @param rho Relative importance of new dataset in the final posterior
#' @param n_new Number of observation in the new dataset.
#' Default to number of observations in PFDC dataset.
#' @param n_historical Number of observations in the historical dataset.
#' Default to the number of observations in the Derexyl dataset (=9408).
#' This value can be obtained with `nrow(load_dataset("Derexyl"))`
#' in [EczemaPredPOSCORAD](https://github.com/ghurault/EczemaPredPOSCORAD).
#'
#' @return Vector
rho_to_a0 <- function(rho,
n_new = nrow(load_PFDC()$POSCORAD),
n_historical = 9408) {
n_new / n_historical * (1 - rho) / rho
}
#' Associate the relative importance `rho` of new dataset in the final posterior to the forgetting factor `a0`
#'
#' @param a0 Forgetting factor
#' @param n_new Number of observation in the new dataset.
#' Default to number of observations in PFDC dataset.
#' @param n_historical Number of observations in the historical dataset.
#' Default to the number of observations in the Derexyl dataset (=9408).
#' This value can be obtained with `nrow(load_dataset("Derexyl"))`
#' in [EczemaPredPOSCORAD](https://github.com/ghurault/EczemaPredPOSCORAD).
#'
#' @return Vector
a0_to_rho <- function(a0,
n_new = nrow(load_PFDC()$POSCORAD),
n_historical = 9408) {
n_new / (n_new + a0 * n_historical)
}
#' Plot influence of forgetting factor when using a power prior
#'
#' Plot the relative importance `rho` of new dataset in the final posterior as a function of the forgetting factor `a0`.
#'
#' @param n_new Number of observation in the new dataset.
#' Default to number of observations in PFDC dataset.
#' @param n_historical Number of observations in the historical dataset.
#' Default to the number of observations in the Derexyl dataset (=9408).
#' This value can be obtained with `nrow(load_dataset("Derexyl"))`
#' in [EczemaPredPOSCORAD](https://github.com/ghurault/EczemaPredPOSCORAD).
#'
#' @return Ggplot
plot_powerprior_influence <- function(n_new = nrow(load_PFDC()$POSCORAD),
n_historical = 9408) {
tibble(a0 = seq(0, 1, .01)) %>%
mutate(rho = a0_to_rho(a0)) %>%
ggplot(aes(x = a0, y = rho)) +
geom_line() +
coord_cartesian(xlim = c(0,1 ), ylim = c(0, 1), expand = FALSE) +
theme_bw(base_size = 15)
}