diff --git a/.Rbuildignore b/.Rbuildignore index c4f1ad1..63c7030 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -15,3 +15,4 @@ ^readme-cache/ ^vignettes/ ^wercker\.yml$ +^_pkgdown\.yml$ diff --git a/.travis.yml b/.travis.yml index cccf3d9..7c0ba73 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,16 +3,6 @@ cache: packages matrix: include: - - os: linux - dist: precise - r: release - - os: linux - dist: precise - r: devel - - os: linux - dist: precise - r: oldrel - - os: linux dist: trusty r: release @@ -30,10 +20,6 @@ matrix: osx_image: xcode8.3 latex: false r: release - # - os: osx - # osx_image: xcode8.3 - # latex: false - # r: devel - os: osx osx_image: xcode8.3 latex: false @@ -43,9 +29,6 @@ matrix: - os: osx latex: false r: release - # - os: osx - # latex: false - # r: devel - os: osx latex: false r: oldrel diff --git a/DESCRIPTION b/DESCRIPTION index 0bfb257..d8342f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dgo Title: Dynamic Estimation of Group-Level Opinion -Version: 0.2.10 -Date: 2017-05-29 +Version: 0.2.11 +Date: 2017-10-26 Description: Fit dynamic group-level IRT and MRP models from individual or aggregated item response data. This package handles common preprocessing tasks and extends functions for inspecting results, poststratification, and @@ -43,7 +43,6 @@ Collate: 'class-dgmrp_fit.r' 'dgirt.r' 'dichotomize_item_responses.r' - 'expand_rownames.r' 'methods-control.r' 'methods-dgirtfit-plot.r' 'methods-dgirtfit-poststratify.r' diff --git a/Makefile b/Makefile index 096f357..752c33f 100644 --- a/Makefile +++ b/Makefile @@ -25,18 +25,17 @@ build-cran: $(R) CMD build . --no-resave-data --no-manual check: - $(R) CMD check $(PKG)_$(VERSION).tar.gz + $(R) CMD check $(BINARY) check-cran: - $(R) CMD check --as-cran $(PKG)_$(VERSION).tar.gz + $(R) CMD check --as-cran $(BINARY) -check-quick $(PKG)_$(VERSION).tar.gz: +check-quick $(BINARY): $(R) $(R_ARGS) CMD build . - $(R) CMD check $(PKG)_$(VERSION).tar.gz + $(R) CMD check $(BINARY) -install: $(PKG)_$(VERSION).tar.gz - $(R) CMD INSTALL --no-multiarch --with-keep.source \ - $(PKG)_$(VERSION).tar.gz +install: $(BINARY) + $(R) CMD INSTALL --no-multiarch --with-keep.source $(BINARY) install-code: $(R) CMD INSTALL --no-multiarch --with-keep.source --no-docs . diff --git a/NAMESPACE b/NAMESPACE index 1ee7f46..d102e26 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,7 +3,6 @@ S3method(as.data.frame,dgo_fit) export(dgirt) export(dgmrp) -export(expand_rownames) export(plot_rhats) export(shape) export(summarize) diff --git a/NEWS.md b/NEWS.md index 17f2fa2..3923f0b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +## dgo 0.2.11 + +* Add poststratification over posterior samples (closes #21). +* `shape()` now accepts aggregated item response data unaccompanied by + individual-level item response data. The `item_data` and `item_names` + arguments are no longer required. +* Add a `max_raked_weight` argument to `shape()` for trimming raked weights. + Note that trimming occurs before raked weights are rescaled to have mean 1, + and the rescaled weights can be larger than `max_raked_weight`. +* Remove the unused function `expand_rownames()`. +* Bugfixes. + ## dgo 0.2.10 * Remove Rcpp dependency by rewriting `dichotomize()` in R. diff --git a/R/aggregate_item_responses.r b/R/aggregate_item_responses.r index 90bb4fa..9b5cc9e 100644 --- a/R/aggregate_item_responses.r +++ b/R/aggregate_item_responses.r @@ -23,78 +23,90 @@ make_group_counts <- function(item_data, aggregate_data, ctrl) { # Because of how DGIRT Stan code iterates over the data, the result must be # ordered by time, item, and then group. The order of the grouping variables # doesn't matter. - gt_names <- attr(item_data, "gt_items") - item_data[, c("n_responses") := list(rowSums(!is.na(.SD))), - .SDcols = gt_names] - if (!length(ctrl@weight_name)) { - item_data[, weight := 1L] - ctrl@weight_name <- "weight" - } - item_data[, c("def") := lapply(.SD, calc_design_effects), - .SDcols = ctrl@weight_name, - by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)] - - # get design-effect-adjusted nonmissing response counts by group and item - item_n <- item_data[, lapply(.SD, count_items, get("n_responses"), get("def")), - .SDcols = c(gt_names), - by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)] - # append _n_grp to the response count columns - item_n_vars <- paste0(gt_names, "_n_grp") - names(item_n) <- replace(names(item_n), match(gt_names, names(item_n)), item_n_vars) - data.table::setkeyv(item_n, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names)) - drop_cols <- setdiff(names(item_n), c(key(item_n), item_n_vars)) - if (length(drop_cols)) { - item_n[, c(drop_cols) := NULL] - } + if (length(item_data)) { + gt_names <- attr(item_data, "gt_items") + item_data[, c("n_responses") := list(rowSums(!is.na(.SD))), + .SDcols = gt_names] + if (!length(ctrl@weight_name)) { + item_data[, weight := 1L] + ctrl@weight_name <- "weight" + } + item_data[, c("def") := lapply(.SD, calc_design_effects), + .SDcols = ctrl@weight_name, + by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)] - # get mean ystar - item_data[, c("adj_weight") := get(ctrl@weight_name) / get("n_responses")] - item_means <- item_data[, lapply(.SD, function(x) weighted.mean(x, .SD$adj_weight, na.rm = TRUE)), - .SDcols = c(gt_names, "adj_weight"), - by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)] - # append _mean to the mean response columns - item_mean_vars <- paste0(gt_names, "_mean") - names(item_means) <- replace(names(item_means), match(gt_names, names(item_means)), item_mean_vars) - data.table::setkeyv(item_means, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names)) - drop_cols <- setdiff(names(item_means), c(key(item_means), item_mean_vars)) - if (length(drop_cols)) { - item_means[, c(drop_cols) := NULL] - } + # get design-effect-adjusted nonmissing response counts by group and item + item_n <- item_data[, lapply(.SD, count_items, get("n_responses"), get("def")), + .SDcols = c(gt_names), + by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)] + # append _n_grp to the response count columns + item_n_vars <- paste0(gt_names, "_n_grp") + names(item_n) <- replace(names(item_n), match(gt_names, names(item_n)), item_n_vars) + data.table::setkeyv(item_n, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names)) + drop_cols <- setdiff(names(item_n), c(key(item_n), item_n_vars)) + if (length(drop_cols)) { + item_n[, c(drop_cols) := NULL] + } + + # get mean ystar + item_data[, c("adj_weight") := get(ctrl@weight_name) / get("n_responses")] + item_means <- item_data[, lapply(.SD, function(x) weighted.mean(x, .SD$adj_weight, na.rm = TRUE)), + .SDcols = c(gt_names, "adj_weight"), + by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)] + # append _mean to the mean response columns + item_mean_vars <- paste0(gt_names, "_mean") + names(item_means) <- replace(names(item_means), match(gt_names, names(item_means)), item_mean_vars) + data.table::setkeyv(item_means, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names)) + drop_cols <- setdiff(names(item_means), c(key(item_means), item_mean_vars)) + if (length(drop_cols)) { + item_means[, c(drop_cols) := NULL] + } + + # join response counts with means + count_means <- item_n[item_means] + count_means <- count_means[, c(ctrl@time_name, ctrl@geo_name, + ctrl@group_names, item_mean_vars, + item_n_vars), with = FALSE] - # join response counts with means - count_means <- item_n[item_means] - count_means <- count_means[, c(ctrl@time_name, ctrl@geo_name, - ctrl@group_names, item_mean_vars, - item_n_vars), with = FALSE] - - # the group success count for an item is the product of its count and mean - item_s_vars <- paste0(gt_names, "_s_grp") - count_means[, c(item_s_vars) := round(count_means[, (item_mean_vars), with = FALSE] * - count_means[, (item_n_vars), with = FALSE], 0)] - count_means <- count_means[, -grep("_mean$", names(count_means)), with = FALSE] - - - # we want a long table of successes (s_grp) and trials (n_grp) by group and - # item; items need to move from columns to rows - melted <- melt(count_means, id.vars = c(ctrl@time_name, ctrl@geo_name, - ctrl@group_names), - variable.name = "item") - melted[, c("variable") := list(gsub(".*([sn]_grp)$", "\\1", get("item")))] - melted[, c("item") := list(gsub("(.*)_[sn]_grp$", "\\1", get("item")))] - f <- as.formula(paste0(paste(ctrl@time_name, ctrl@geo_name, - paste(ctrl@group_names, collapse = " + "), - "item", sep = "+"), " ~ variable")) - counts <- data.table::dcast.data.table(melted, f, drop = FALSE, fill = 0L) + # the group success count for an item is the product of its count and mean + item_s_vars <- paste0(gt_names, "_s_grp") + count_means[, c(item_s_vars) := round(count_means[, (item_mean_vars), with = FALSE] * + count_means[, (item_n_vars), with = FALSE], 0)] + count_means <- count_means[, -grep("_mean$", names(count_means)), with = FALSE] + + + # we want a long table of successes (s_grp) and trials (n_grp) by group and + # item; items need to move from columns to rows + melted <- melt(count_means, id.vars = c(ctrl@time_name, ctrl@geo_name, + ctrl@group_names), + variable.name = "item") + melted[, c("variable") := list(gsub(".*([sn]_grp)$", "\\1", get("item")))] + melted[, c("item") := list(gsub("(.*)_[sn]_grp$", "\\1", get("item")))] + f <- as.formula(paste0(paste(ctrl@time_name, ctrl@geo_name, + paste(ctrl@group_names, collapse = " + "), + "item", sep = "+"), " ~ variable")) + counts <- data.table::dcast.data.table(melted, f, drop = FALSE, fill = 0L) + } # include aggregates, if any - if (length(aggregate_data) && nrow(aggregate_data) > 0) { + if (length(item_data) && length(aggregate_data) && nrow(aggregate_data) > 0) { + # invariant: we have both individual- and aggregate-level item responses counts <- data.table::rbindlist(list(counts, aggregate_data), use.names = TRUE) message("Added ", length(ctrl@aggregate_item_names), " items from aggregate data.") - data.table::setkeyv(counts, c(ctrl@time_name, "item", ctrl@group_names, - ctrl@geo_name)) + } else if (length(aggregate_data) && nrow(aggregate_data) > 0) { + # invariant: we have only aggregate-level item responses + # aggregate_data is already in the expected format + counts <- aggregate_data + message("Using ", length(ctrl@aggregate_item_names), " items from aggregate data.") + } else if (!length(item_data)) { + # invariant: we unexpectedly have neither individual- nor aggregate-level data + stop("can't proceed with neither item_data nor aggregate_data") } + data.table::setkeyv(counts, c(ctrl@time_name, "item", ctrl@group_names, + ctrl@geo_name)) + # include unobserved cells all_groups = expand.grid(c(setNames(list(unique(counts[[ctrl@geo_name]])), ctrl@geo_name), setNames(list(ctrl@time_filter), ctrl@time_name), diff --git a/R/assertions.r b/R/assertions.r index 4b0dd83..bd24864 100644 --- a/R/assertions.r +++ b/R/assertions.r @@ -41,7 +41,7 @@ has_all_names <- function(table, names, suggestion = NULL) { } assertthat::on_failure(has_all_names) <- function(call, env) { - paste0("not all ", call$names, " are names in ", deparse(call$table)) + paste0("not all of ", deparse(call$names), " are names in ", deparse(call$table)) } all_strings <- function(x) { diff --git a/R/class-control.r b/R/class-control.r index 74e35a6..110275e 100644 --- a/R/class-control.r +++ b/R/class-control.r @@ -1,6 +1,5 @@ setClass("Control", - slots = list(# item data - item_names = "character", + slots = list(item_names = "ANY", time_name = "character", geo_name = "character", group_names = "ANY", @@ -22,13 +21,21 @@ setClass("Control", weight_name = "ANY", proportion_name = "character", rake_names = "character", + max_raked_weight = "ANY", # modeling options - constant_item = "logical"), + constant_item = "logical", + # indicators for state + has_individual_data = "ANY", + has_aggregate_data = "ANY", + has_target_data = "ANY", + has_modifier_data = "ANY"), validity = function(object) { if (!length(object@time_name) == 1L) "\"time_name\" should be a single variable name" else if (!length(object@geo_name) == 1L) "\"geo_name\" should be a single variable name" + else if (length(object@item_names) && !is.character(object@item_names)) + "if specified \"item_names\" should give variable names in a character vector" else if (length(object@survey_name) && length(object@survey_name) != 1L) "if specified \"survey_name\" should be a single variable name" else if (length(object@survey_name) && !is.character(object@survey_name)) @@ -64,14 +71,17 @@ setClass("Control", else if (!length(object@constant_item) == 1L && is.logical(object@constant_item)) "\"constant_item\" should be a single logical value" - # else if (length(unique(object@time_filter)) == 1L) - # "if specified \"time_filter\" should give at least two time periods" else if (length(unique(object@geo_filter)) == 1L) "if specified \"geo_filter\" should give at least two local geographic areas" else if (length(object@min_survey_filter) != 1L || object@min_survey_filter <= 0L) "\"min_survey_filter\" should be a positive integer" else if (!length(object@min_t_filter) == 1L && object@min_t_filter > 0L) "\"min_t_filter\" should be a positive integer" + else if (length(object@max_raked_weight) && + (length(object@max_raked_weight) > 1 | + !is.numeric(object@max_raked_weight))) { + "if specified \"max_raked_weight\" should be a single number" + } else TRUE }) diff --git a/R/class-dgirtin.r b/R/class-dgirtin.r index c0ebfa6..3007250 100644 --- a/R/class-dgirtin.r +++ b/R/class-dgirtin.r @@ -1,7 +1,7 @@ #' Class \code{dgirtIn}: data prepared for modeling with \code{dgirt} #' #' \code{shape} generates objects of class \code{dgirtIn} for modeling with -#' \code{dgirt}. +#' \code{dgirt} and \code{dgmrp}. #' #' @aliases dgirtin-class, get_item_n, get_item_names, get_n, dgirtIn-method, #' print.dgirtIn, @@ -21,8 +21,11 @@ NULL setOldClass("dgirtIn", "R6") dgirtIn <- R6::R6Class("dgirtIn", public = c( + # model objects (N, G, T, ...) and shape objects (item_data, etc.) are + # public and initially NULL setNames(lapply(c(model_objects, shape_objects), function(x) NULL), c(model_objects, shape_objects)), + # the class is instantiated from a Control object initialize = function(ctrl) { if (length(ctrl@constant_item)) { self$constant_item <- ctrl@constant_item @@ -30,29 +33,51 @@ dgirtIn <- R6::R6Class("dgirtIn", self$mod_par_names <- c(ctrl@geo_name, ctrl@time_name) self$unmod_par_names <- ctrl@group_names }, + # the as_list method extracts attributes used in modeling as expected by + # rstan. its arguments will be passed from a dgirt or dgmrp call as_list = function(separate_t, delta_tbar_prior_mean, delta_tbar_prior_sd, innov_sd_delta_scale, innov_sd_theta_scale, hierarchical_model) { + + # model_objects is a character vector of attribute names for rstan data d_in_list <- Map(function(x) self[[x]], private$model_objects) - if (!length(separate_t) == 1L && is.logical(separate_t)) + + # separate_t is a boolean in the stan code + if (length(separate_t) != 1L || !is.logical(separate_t)) { stop("\"separate_t\" should be a single logical value") - else d_in_list$separate_t <- separate_t - if (!length(hierarchical_model) == 1L && is.logical(hierarchical_model)) + } + d_in_list$separate_t <- separate_t + + # hierarchical_model is also boolean in the stan code + if (length(hierarchical_model) != 1L || !is.logical(hierarchical_model)) { stop("\"hierarchical_model\" should be a single logical value") - else d_in_list$hierarchical_model <- hierarchical_model - if (!length(delta_tbar_prior_mean) == 1L && - is.numeric(delta_tbar_prior_mean)) + } + d_in_list$hierarchical_model <- hierarchical_model + + if (length(delta_tbar_prior_mean) != 1L || !is.numeric(delta_tbar_prior_mean)) { stop("\"delta_tbar_prior_mean\" should be a single real value") - else d_in_list$delta_tbar_prior_mean <- delta_tbar_prior_mean - if (!length(delta_tbar_prior_sd) == 1L && is.numeric(delta_tbar_prior_sd)) + } + d_in_list$delta_tbar_prior_mean <- delta_tbar_prior_mean + + if (length(delta_tbar_prior_sd) != 1L || !is.numeric(delta_tbar_prior_sd) + || delta_tbar_prior_sd < 0) + { stop("\"delta_tbar_prior_sd\" should be a single positive real value") - else d_in_list$delta_tbar_prior_sd <- delta_tbar_prior_sd - if (!length(innov_sd_delta_scale ) == 1L && is.numeric(innov_sd_delta_scale)) - stop("\"delta_tbar_delta_scale\" should be a single real value") - else d_in_list$innov_sd_delta_scale <- innov_sd_delta_scale - if (!length(innov_sd_theta_scale ) == 1L && is.numeric(innov_sd_theta_scale)) - stop("\"delta_tbar_theta_scale\" should be a single real value") - else d_in_list$innov_sd_theta_scale <- innov_sd_theta_scale + } + d_in_list$delta_tbar_prior_sd <- delta_tbar_prior_sd + + if (length(innov_sd_delta_scale) != 1L || + !is.numeric(innov_sd_delta_scale) || innov_sd_delta_scale < 0) { + stop("\"innov_sd_delta_scale\" should be a single real value") + } + d_in_list$innov_sd_delta_scale <- innov_sd_delta_scale + + if (length(innov_sd_theta_scale ) != 1L || + !is.numeric(innov_sd_theta_scale) || innov_sd_theta_scale < 0) { + stop("\"innov_sd_theta_scale\" should be a single positive real value") + } + d_in_list$innov_sd_theta_scale <- innov_sd_theta_scale + d_in_list }), - private = list(model_objects = model_objects, - shape_objects = shape_objects)) + # keep track of which items will be used in modeling + private = list(model_objects = model_objects, shape_objects = shape_objects)) diff --git a/R/expand_rownames.r b/R/expand_rownames.r deleted file mode 100644 index 4a458ff..0000000 --- a/R/expand_rownames.r +++ /dev/null @@ -1,65 +0,0 @@ -#' \code{expand_rownames}: expand parameter descriptions in rownames -#' -#' Move rownames that describe parameters (e.g. xi[2009]) to columns. -#' -#' It should rarely be necessary to call \code{expand_rownames} directly. But -#' elements extracted from \code{\link{dgirtfit}}-class objects may have -#' rownames of the format \code{param[group1__groupK,t]} for parameters indexed -#' by group and time period, or \code{param[t]} for parameters indexed by time -#' period. \code{expand_rownames} moves this information to columns whose names -#' are given by the \code{col_names} argument. The rownames in their original -#' format will appear in another column called \code{rn}. -#' -#' @param x A table with rownames in the format \code{param[group1__groupK,t]} -#' or \code{param[t]}. -#' -#' @param time_name A name for any resulting time variable. -#' -#' @param geo_name A name for any resulting geographic variable. -#' -#' @param group_names Names for any resulting group variables. -#' -#' @return \code{x} with additional columns (see details). -#' @seealso \code{\link{dgirtfit-class}} -#' @export -expand_rownames <- function(x, time_name, geo_name, group_names) { - if (is.matrix(x)) x <- as.data.frame(x, stringsAsFactors = FALSE, - rownames = rownames(x)) - x <- data.table::copy(data.table::setDT(x, keep.rownames = TRUE)) - if (!"rn" %in% names(x)) stop("After setDT(x, keep.rownames = TRUE), ", - "rownames couldn't be found. Did x ", - "have rownames?") - indexes <- gsub('.*\\[([A-Za-z0-9,_]+)\\].*', '\\1', x[["rn"]]) - parnames <- gsub('(.*)\\[[A-Za-z0-9,_]+\\].*', '\\1', x[["rn"]]) - comma_split <- data.table::tstrsplit(indexes, c(",")) - for (parname in unique(parnames)) { - # index_names is a list for looking up the names of parameter indexes - for (i in index_names[[parname]]) { - if (length(i)) { - index_pos <- which(index_names[[parname]] == i) - x[parnames == parname, c(i) := - list(comma_split[[index_pos]][parnames == parname])] - } - } - } - if ("group_names" %in% names(x)) { - us_split <- strsplit(x[["group_names"]], "__", fixed = TRUE) - n_col <- max(vapply(us_split, length, integer(1L))) - group_cols <- paste0("group_", seq.int(0, n_col - 1L)) - x[, c(group_cols) := data.table::tstrsplit(group_names, "__", fixed = TRUE)] - if (length(geo_name)) { - x[, c(geo_name, group_cols[1L]) := list(get(group_cols[1L]), NULL)] - } - if (length(group_names)) { - x[, c(group_names, group_cols[-1L]) := list(get(group_cols[-1L]), NULL)] - } - x[, c("group_names") := NULL] - } - if (length(time_name) && "time_name" %in% names(x)) { - names(x)[names(x) == "time_name"] <- time_name - if (is.character(x[[time_name]])) { - x[, c(time_name) := type.convert(x[[time_name]])] - } - } - x -} diff --git a/R/methods-control.r b/R/methods-control.r index aa102b6..871c8ce 100644 --- a/R/methods-control.r +++ b/R/methods-control.r @@ -23,10 +23,10 @@ init_control <- function(item_data = item_data, raking = raking, weight_name = weight_name, proportion_name = proportion_name, + max_raked_weight = max_raked_weight, constant_item = constant_item, ...) { ctrl <- new("Control", - # item data item_names = item_names, time_name = time_name, geo_name = geo_name, @@ -48,12 +48,17 @@ init_control <- function(item_data = item_data, raking = raking, weight_name = weight_name, proportion_name = proportion_name, + max_raked_weight = max_raked_weight, # modeling options constant_item = constant_item, ...) - is_item_name <- valid_names(item_data, ctrl, 1L) - is_item_name(c("time_name", "geo_name")) - has_type(c("time_name", "geo_name"), item_data, ctrl) + + if (length(item_data)) { + is_item_name <- valid_names(item_data, ctrl, 1L) + is_item_name(c("time_name", "geo_name")) + has_type(c("time_name", "geo_name"), item_data, ctrl) + } + if (length(aggregate_data)) { is_agg_name <- valid_names(aggregate_data, ctrl, 1L) is_agg_name(c("time_name", "geo_name")) @@ -65,6 +70,10 @@ init_control <- function(item_data = item_data, } } + if (!length(item_data) & !length(aggregate_data)) { + stop("either \"item_data\" or \"aggregate_data\" must be specified") + } + if (length(ctrl@modifier_names)) { if (!length(ctrl@t1_modifier_names)) { ctrl@t1_modifier_names <- ctrl@modifier_names @@ -72,7 +81,9 @@ init_control <- function(item_data = item_data, } if (!length(ctrl@time_filter)) { - ctrl@time_filter <- sort(unique(item_data[[ctrl@time_name]])) + if (length(item_data)) { + ctrl@time_filter <- sort(unique(item_data[[ctrl@time_name]])) + } if (length(aggregate_data)) { ctrl@time_filter <- sort(unique(c(ctrl@time_filter, aggregate_data[[ctrl@time_name]]))) @@ -80,7 +91,9 @@ init_control <- function(item_data = item_data, } if (!length(ctrl@geo_filter)) { - ctrl@geo_filter <- sort(unique(as.character(item_data[[ctrl@geo_name]]))) + if (length(item_data)) { + ctrl@geo_filter <- sort(unique(as.character(item_data[[ctrl@geo_name]]))) + } if (length(aggregate_data)) { ctrl@geo_filter <- sort(unique(c(ctrl@geo_filter, aggregate_data[[ctrl@geo_name]]))) @@ -94,5 +107,11 @@ init_control <- function(item_data = item_data, ctrl@rake_names = all.vars(ctrl@raking) } } + + ctrl@has_individual_data = ifelse(length(item_data), TRUE, FALSE) + ctrl@has_aggregate_data = ifelse(length(aggregate_data), TRUE, FALSE) + ctrl@has_modifier_data = ifelse(length(modifier_data), TRUE, FALSE) + ctrl@has_target_data = ifelse(length(target_data), TRUE, FALSE) + ctrl } diff --git a/R/methods-dgirtfit-poststratify.r b/R/methods-dgirtfit-poststratify.r index c7ad422..2569471 100644 --- a/R/methods-dgirtfit-poststratify.r +++ b/R/methods-dgirtfit-poststratify.r @@ -16,7 +16,7 @@ utils::globalVariables(c("value", "scaled_prop")) #' @param ... Additional arguments to methods. setGeneric("poststratify", signature = "x", function(x, target_data, strata_names, aggregated_names, - proportion_name = "proportion", ...) + proportion_name = "proportion", ...) standardGeneric("poststratify")) #' @param pars Selected parameter names. @@ -58,12 +58,11 @@ setMethod("poststratify", c("dgo_fit"), #' @export setMethod("poststratify", "data.frame", function(x, target_data, strata_names, aggregated_names, - proportion_name = "proportion", pars = "theta_bar") { + proportion_name = "proportion") { assert(is.data.frame(target_data)) assert(all_strings(strata_names)) - assert(all_strings(strata_names)) + assert(all_strings(aggregated_names)) assert(assertthat::is.string(proportion_name)) - assert(all_strings(pars)) if (anyDuplicated(c(strata_names, aggregated_names))) { stop("Variable names cannot be used more than once across ", @@ -112,39 +111,38 @@ setMethod("poststratify", "data.frame", targets[, c(extra_cols) := NULL] } - x_n <- nrow(x) - props <- merge(x, targets, all = FALSE, by = c(strata_names, - aggregated_names)) - if (!identical(x_n, nrow(props))) { - warning("Not all estimates could be matched with a population proportion ", - "using the stratifying and grouping variables. ", x_n - - nrow(props), " will be dropped from the output, ", - "and this may indicate a larger problem.") + for (varname in c(strata_names, aggregated_names)) { + check_target_levels(varname, x, targets) } - props <- scale_props(props, proportion_name, strata_names) - - check_proportions(props, strata_names) - res <- props[, list(value = sum(value * scaled_prop)), by = strata_names] + props <- merge(x, targets, all = FALSE, by = c(strata_names, + aggregated_names)) + by_vars = c(strata_names, 'iteration') + if (!'iteration' %in% names(props)) { + props[, iteration := 1] + no_iterations <- TRUE + } else { + no_iterations <- FALSE + } + props <- scale_props(props, proportion_name, by_vars) + check_proportions(props, by_vars) + res <- props[, list(value = sum(value * scaled_prop)), by = by_vars] + if (no_iterations) { + res[, iteration := NULL] + } res[] }) -check_estimates <- function(estimates, strata_names) { - estimates[, lapply(.SD, sum), by = c(strata_names)] - estimates -} - -scale_props <- function(props, proportion_name, strata_names) { - strata_sums <- props[, list(strata_sum = sum(get(proportion_name))), - by = strata_names] - props <- merge(props, strata_sums, all = FALSE, by = strata_names) +scale_props <- function(props, proportion_name, by_vars) { + strata_sums <- props[, list(strata_sum = sum(get(proportion_name))), by = + by_vars] + props <- merge(props, strata_sums, all = FALSE, by = by_vars) props[, c("scaled_prop") := get(proportion_name) / get("strata_sum")] return(props) } -check_proportions <- function(tabular, strata_names) { - prop_sums <- tabular[, lapply(.SD, sum), .SDcols = "scaled_prop", - by = strata_names] +check_proportions <- function(tabular, by_vars) { + prop_sums <- tabular[, lapply(.SD, sum), .SDcols = "scaled_prop", by = by_vars] if (!isTRUE(all.equal(rep(1L, nrow(prop_sums)), prop_sums$scaled_prop))) { stop("Not all proportions sum to 1 within stratifying variables even ", " though they should have been rescaled. (The mean sum is ", @@ -156,13 +154,13 @@ check_proportions <- function(tabular, strata_names) { check_target_levels <- function(variable, x, targets) { if (!identical(class(x[[variable]]), class(targets[[variable]]))) { stop("'", variable, "' inherits from '", class(x[[variable]]), - "' in x and '", class(targets[[variable]]), "' in targets.", - "Please reconcile the types.") + "' in estimates and '", class(targets[[variable]]), "' in ", + "targets. Please reconcile the types.") } else if (!all(x[[variable]] %in% targets[[variable]])) { x_levels <- setdiff(x[[variable]], targets[[variable]]) stop("Not all levels of '", variable, "' in estimates are levels of '", variable, "' in targets. Missing: ", paste(x_levels , collapse = ", "), - "missing. The target data should give the population proportion of each - ", "group represented in the estimates.") + ". The target data should give the population proportion of ", + "each group represented in the estimates.") } else TRUE } diff --git a/R/restrict_input_data.r b/R/restrict_input_data.r index da2ab82..a509b14 100644 --- a/R/restrict_input_data.r +++ b/R/restrict_input_data.r @@ -1,45 +1,47 @@ restrict_items <- function(item_data, ctrl) { - extra_colnames <- setdiff(names(item_data), - c(ctrl@item_names, - ctrl@survey_name, - ctrl@geo_name, - ctrl@time_name, - ctrl@group_names, - ctrl@weight_name, - ctrl@rake_names, - ctrl@id_vars - )) - if (length(extra_colnames)) { - item_data[, c(extra_colnames) := NULL] - } - coerce_factors(item_data, c(ctrl@group_names, ctrl@geo_name, - ctrl@survey_name)) - rename_numerics(item_data, c(ctrl@group_names, ctrl@geo_name, - ctrl@survey_name)) - initial_dim <- dim(item_data) - final_dim <- c() - iter <- 1L - while (!identical(initial_dim, final_dim)) { - message("Applying restrictions, pass ", iter, "...") - if (iter == 1L) { - item_data <- drop_rows_missing_covariates(item_data, ctrl) - item_data <- keep_t(item_data, ctrl) - item_data <- keep_geo(item_data, ctrl) + if (length(item_data)) { + extra_colnames <- setdiff(names(item_data), + c(ctrl@item_names, + ctrl@survey_name, + ctrl@geo_name, + ctrl@time_name, + ctrl@group_names, + ctrl@weight_name, + ctrl@rake_names, + ctrl@id_vars + )) + if (length(extra_colnames)) { + item_data[, c(extra_colnames) := NULL] } + coerce_factors(item_data, c(ctrl@group_names, ctrl@geo_name, + ctrl@survey_name)) + rename_numerics(item_data, c(ctrl@group_names, ctrl@geo_name, + ctrl@survey_name)) initial_dim <- dim(item_data) - drop_responseless_items(item_data, ctrl) - drop_items_rare_in_time(item_data, ctrl) - if (length(ctrl@survey_name)) { - drop_items_rare_in_polls(item_data, ctrl) + final_dim <- c() + iter <- 1L + while (!identical(initial_dim, final_dim)) { + message("Applying restrictions, pass ", iter, "...") + if (iter == 1L) { + item_data <- drop_rows_missing_covariates(item_data, ctrl) + item_data <- keep_t(item_data, ctrl) + item_data <- keep_geo(item_data, ctrl) + } + initial_dim <- dim(item_data) + drop_responseless_items(item_data, ctrl) + drop_items_rare_in_time(item_data, ctrl) + if (length(ctrl@survey_name)) { + drop_items_rare_in_polls(item_data, ctrl) + } + item_data <- drop_itemless_respondents(item_data, ctrl) + final_dim <- dim(item_data) + iter <- iter + 1L + if (identical(initial_dim, final_dim)) { + message("\tNo changes") + } } - item_data <- drop_itemless_respondents(item_data, ctrl) - final_dim <- dim(item_data) - iter <- iter + 1L - if (identical(initial_dim, final_dim)) { - message("\tNo changes") - } + setkeyv(item_data, c(ctrl@geo_name, ctrl@time_name)) } - setkeyv(item_data, c(ctrl@geo_name, ctrl@time_name)) invisible(item_data) } @@ -287,12 +289,6 @@ drop_items_rare_in_polls <- function(item_data, ctrl) { invisible(item_data) } -get_observed <- function(item_data, aggregate_data, varname) { - obs <- Map(unique.data.frame, list(item_data[, varname, with = FALSE], - aggregate_data[, varname, with = FALSE])) - sort.default(unique.default(unname(unlist(obs)))) -} - stop_if_any_na <- function(where, varnames) { # If there are NA values in any variable named in 'varnames', in the dataframe # given by 'where', stop diff --git a/R/reweight_item_responses.r b/R/reweight_item_responses.r index 3dc2a51..ff2f1e2 100644 --- a/R/reweight_item_responses.r +++ b/R/reweight_item_responses.r @@ -1,9 +1,15 @@ -utils::globalVariables(c("raked_weight")) +utils::globalVariables(c("raked_weight", "preweight")) weight <- function(item_data, target_data, control) { # Adjust individual survey weights given population targets item_data[, c("preweight") := rake_weights(item_data, target_data, control)] + + if (length(control@max_raked_weight)) { + item_data[preweight > control@max_raked_weight, preweight := + control@max_raked_weight] + } + item_data[, c("raked_weight") := list(get("preweight") / mean(get("preweight"), na.rm = TRUE)), by = eval(control@time_name)] @@ -43,3 +49,4 @@ rake_weights <- function(item_data, target_data, control) { } return(raked_weights) } + diff --git a/R/shape.r b/R/shape.r index 219f406..eacfe9b 100644 --- a/R/shape.r +++ b/R/shape.r @@ -39,11 +39,15 @@ #' @section Reweighting: #' Use argument \code{target_data} to adjust the weighting of groups toward #' population targets via raking, using an adaptation of -#' \code{\link[survey]{rake}}. To adjust individual survey weights in +#' \code{\link[survey]{rake}}. To adjust existing survey weights in #' \code{item_data}, provide argument \code{weight_name}. Otherwise, #' observations in \code{item_data} will be assigned equal starting weights. -#' Argument \code{raking} defines strata. Argument \code{proportion_name} -#' is optional. +#' Argument \code{raking} defines strata. If you pass it a list of formulas like +#' \code{list(~ x, ~ y)}, raking is first over \code{x}, then over \code{y}. +#' Given an additive formula like \code{~ x + y}, raking is over the +#' combinations of \code{x} and \code{y}. So, \code{list(~ x, ~ y + z)} is first +#' over \code{x}, then over \code{y}-\code{z} pairs. Argument +#' \code{proportion_name} is optional. #' #' @section Restrictions: #' For convenience, data in \code{item_data}, \code{modifier_data}, @@ -85,6 +89,10 @@ #' @param raking A formula or list of formulas specifying the variables on which #' to rake survey weights. #' +#' @param max_raked_weight A maximum over which raked weights will be trimmed. +#' Only applied after raking. To trim unraked weights, manipulate the input data +#' directly. +#' #' @param aggregate_data A table of trial and success counts by group and item. #' See details below. #' @@ -141,8 +149,8 @@ #' get_item_n(shaped_responses, by = "year") #' #' @export -shape <- function(item_data, - item_names, +shape <- function(item_data = NULL, + item_names = NULL, time_name, geo_name, group_names = NULL, @@ -158,6 +166,7 @@ shape <- function(item_data, standardize = TRUE, target_data = NULL, raking = NULL, + max_raked_weight = NULL, weight_name = NULL, proportion_name = "proportion", aggregate_data = NULL, @@ -187,6 +196,7 @@ shape <- function(item_data, raking = raking, weight_name = weight_name, proportion_name = proportion_name, + max_raked_weight = max_raked_weight, constant_item = constant_item, ...) @@ -210,7 +220,7 @@ shape <- function(item_data, aggregate_data$item) # rake survey weights # - if (length(target_data)) { + if (length(target_data) & length(item_data)) { item_data <- weight(item_data, target_data, ctrl) ctrl@weight_name <- "raked_weight" } @@ -241,7 +251,9 @@ init_dgirt_in <- function(item_data, aggregate_data, modifier_data, target_data, d_in <- dgirtIn$new(ctrl) # aggregate individual item response data to group level # - item_data <- dichotomize(item_data, ctrl) + if (length(item_data)) { + item_data <- dichotomize(item_data, ctrl) + } d_in$group_grid <- make_group_grid(item_data, aggregate_data, ctrl) d_in$group_grid_t <- make_group_grid_t(d_in$group_grid, ctrl) d_in$group_counts <- make_group_counts(item_data, aggregate_data, ctrl) diff --git a/R/validate_input_data.r b/R/validate_input_data.r index 1682334..3bdc376 100644 --- a/R/validate_input_data.r +++ b/R/validate_input_data.r @@ -62,31 +62,33 @@ check_modifiers <- function(modifier_data, ctrl) { } check_item <- function(item_data, ctrl) { - is_name <- valid_names(item_data, ctrl, 1L) - is_name(c("time_name", "geo_name")) - are_names <- valid_names(item_data, ctrl) - are_names("item_names") - if (length(ctrl@id_vars)) { - are_names("id_vars") - } - for (varname in c("weight_name", "survey_name")) { - if (length(slot(ctrl, varname))) { - is_name(varname) - has_type(varname, item_data, ctrl) + if (length(item_data)) { + is_name <- valid_names(item_data, ctrl, 1L) + is_name(c("time_name", "geo_name")) + are_names <- valid_names(item_data, ctrl) + are_names("item_names") + if (length(ctrl@id_vars)) { + are_names("id_vars") + } + for (varname in c("weight_name", "survey_name")) { + if (length(slot(ctrl, varname))) { + is_name(varname) + has_type(varname, item_data, ctrl) + } + } + has_type(c("time_name", "geo_name", "group_names"), item_data, ctrl) + check_time(item_data, ctrl@time_name) + if (is.list(ctrl@raking)) { + raking = unlist(lapply(ctrl@raking, all.vars)) + } else { + raking = all.vars(ctrl@raking) + } + are_valid_terms <- valid_names(item_data, len = 1, stub = "is a raking formula term but isn't") + are_valid_terms(raking) + for (name in c(ctrl@group_names, ctrl@geo_name)) { + if (!length(unique(item_data[[name]])) > 1) + stop(name, " doesn't vary in item_data") } - } - has_type(c("time_name", "geo_name", "group_names"), item_data, ctrl) - check_time(item_data, ctrl@time_name) - if (is.list(ctrl@raking)) { - raking = unlist(lapply(ctrl@raking, all.vars)) - } else { - raking = all.vars(ctrl@raking) - } - are_valid_terms <- valid_names(item_data, len = 1, stub = "is a raking formula term but isn't") - are_valid_terms(raking) - for (name in c(ctrl@group_names, ctrl@geo_name)) { - if (!length(unique(item_data[[name]])) > 1) - stop(name, " doesn't vary in item_data") } } diff --git a/README.Rmd b/README.Rmd index 436e6b8..88dc94c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -44,7 +44,7 @@ knitr::opts_chunk$set( # Installation -dgo can be installed from [CRAN](https://cran.r-project.org/web/packages/dgo/index.html): +dgo can be installed from [CRAN](https://CRAN.R-project.org/package=dgo): ```{r, eval = FALSE} install.packages("dgo") diff --git a/README.md b/README.md index 9b28ca9..aee9baf 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ This model opens up new areas of research on historical public opinion in the Un Installation ============ -dgo can be installed from [CRAN](https://cran.r-project.org/web/packages/dgo/index.html): +dgo can be installed from [CRAN](https://CRAN.R-project.org/package=dgo): ``` r install.packages("dgo") @@ -35,6 +35,14 @@ Load the package and set RStan's recommended options for a local, multicore mach ``` r library(dgo) +#> Loading required package: dgodata +#> Loading required package: rstan +#> Loading required package: ggplot2 +#> Loading required package: StanHeaders +#> rstan (Version 2.16.2, packaged: 2017-07-03 09:24:58 UTC, GitRev: 2e1f913d3ca3) +#> For execution on a local, multicore CPU with excess RAM we recommend calling +#> rstan_options(auto_write = TRUE) +#> options(mc.cores = parallel::detectCores()) rstan_options(auto_write = TRUE) options(mc.cores = parallel::detectCores()) ``` diff --git a/_pkgdown.yml b/_pkgdown.yml index 8b140f4..13d40c9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -25,7 +25,6 @@ reference: - poststratify - plot_rhats - dgirt_plot - - expand_rownames navbar: left: diff --git a/appveyor.yml b/appveyor.yml index 2ead4c1..b8495f7 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -19,19 +19,11 @@ environment: USE_RTOOLS: true matrix: - - R_VERSION: devel - GCC_PATH: mingw_32 - - - R_VERSION: devel - R_ARCH: x64 - GCC_PATH: mingw_64 - - R_VERSION: release R_ARCH: x64 - - R_VERSION: oldrel - RTOOLS_VERSION: 32 - CRAN: http://cran.rstudio.com + - R_VERSION: release + R_ARCH: i386 build_script: - travis-tool.sh install_deps diff --git a/data/toy_dgirt_in.rda b/data/toy_dgirt_in.rda index 6f91c2f..a70e44e 100644 Binary files a/data/toy_dgirt_in.rda and b/data/toy_dgirt_in.rda differ diff --git a/data/toy_dgirtfit.rda b/data/toy_dgirtfit.rda index db35c41..f310ddc 100644 Binary files a/data/toy_dgirtfit.rda and b/data/toy_dgirtfit.rda differ diff --git a/docs/articles/abortion_attitudes.html b/docs/articles/abortion_attitudes.html index 9bfbb4d..2ead0c7 100644 --- a/docs/articles/abortion_attitudes.html +++ b/docs/articles/abortion_attitudes.html @@ -66,11 +66,11 @@

Abortion Attitudes

-

This vignette demonstrates estimation of public attitudes toward abortion from responses to a single survey item, using the dynamic multi-level regression and post-stratification (MRP) model implemented in dgmrp().

+

This vignette demonstrates estimation of public attitudes toward abortion from responses to a single survey item, using the dynamic multi-level regression and post-stratification (MRP) model implemented in dgmrp().

Prepare input data

-

shape() prepares input data for use with the modeling functions dgirt() and dgmrp(). Here we use the included opinion dataset.

+

shape() prepares input data for use with the modeling functions dgirt() and dgmrp(). Here we use the included opinion dataset.

dgirt_in_abortion <- shape(opinion, item_names = "abortion", time_name = "year",
   geo_name = "state", group_names = "race3", geo_filter = c("CA", "GA", "LA",
     "MA"), id_vars = "source")
@@ -110,14 +110,14 @@ 

#> Constants: #> Q T P N G H D #> 1 5 5 60 12 1 1

-

get_n() and get_item_n() give response counts.

-
get_n(dgirt_in_abortion, by = "state")
+

get_n() and get_item_n() give response counts.

+
get_n(dgirt_in_abortion, by = "state")
 #>    state     n
 #> 1:    CA 14248
 #> 2:    GA  4547
 #> 3:    LA  1658
 #> 4:    MA  2554
-get_item_n(dgirt_in_abortion, by = "year")
+get_item_n(dgirt_in_abortion, by = "year")
 #>    year abortion
 #> 1: 2006     5275
 #> 2: 2007     1690
@@ -128,9 +128,9 @@ 

Fit a model

-

dgmrp() fits a dynamic multi-level regression and post-stratification (MRP) model to data processed by shape(). Here, we’ll use it to estimate public attitudes toward abortion over time, for the groups defined by state and race3. (Specifically, by their Cartesian product.)

-

Under the hood, dgmrp() uses RStan for MCMC sampling, and arguments can be passed to RStan’s stan() via the ... argument of dgmrp(). This is almost always desirable. Here, we specify the number of sampler iterations, chains, and cores.

-
dgmrp_out_abortion <- dgmrp(dgirt_in_abortion, iter = 1500, chains = 4, cores =
+

dgmrp() fits a dynamic multi-level regression and post-stratification (MRP) model to data processed by shape(). Here, we’ll use it to estimate public attitudes toward abortion over time, for the groups defined by state and race3. (Specifically, by their Cartesian product.)

+

Under the hood, dgmrp() uses RStan for MCMC sampling, and arguments can be passed to RStan’s stan() via the ... argument of dgmrp(). This is almost always desirable. Here, we specify the number of sampler iterations, chains, and cores.

+
dgmrp_out_abortion <- dgmrp(dgirt_in_abortion, iter = 1500, chains = 4, cores =
   4, seed = 42)

The model results are held in a dgmrp_fit object. Methods from RStan like extract() are available if needed because dgmrp_fit is a subclass of stanfit. But dgo provides its own methods for typical post-estimation tasks.

@@ -240,7 +240,8 @@

-

dgmrp returns a fitted model object of class dgmrp_fit, +

dgmrp returns a fitted model object of class dgmrp_fit, which inherits from dgo_fit.

diff --git a/docs/reference/dgo.html b/docs/reference/dgo.html index 90339ec..160f977 100644 --- a/docs/reference/dgo.html +++ b/docs/reference/dgo.html @@ -23,7 +23,8 @@ - + + diff --git a/docs/reference/dgo_fit-class.html b/docs/reference/dgo_fit-class.html index 45420e0..5c33b9a 100644 --- a/docs/reference/dgo_fit-class.html +++ b/docs/reference/dgo_fit-class.html @@ -23,7 +23,8 @@ - + + diff --git a/docs/reference/dgo_fit-methods.html b/docs/reference/dgo_fit-methods.html index 1cf5510..c9499f9 100644 --- a/docs/reference/dgo_fit-methods.html +++ b/docs/reference/dgo_fit-methods.html @@ -23,7 +23,8 @@ - + + @@ -175,58 +176,58 @@

Value

Examples

data(toy_dgirtfit) -summarize(toy_dgirtfit)
#> param state race3 year mean sd median q_025 -#> 1: theta_bar SC black 2009 2.581303889 2.0773702 2.025744919 0.4663683 -#> 2: theta_bar SC black 2010 1.332893651 0.9342120 1.087193518 0.3108696 -#> 3: theta_bar SC other 2009 -1.474896258 1.5187934 -1.073520925 -5.1898485 -#> 4: theta_bar SC other 2010 0.002222642 0.8380635 -0.001426992 -1.7762351 -#> 5: theta_bar SC white 2009 -1.862723209 1.5588242 -1.463595482 -5.9613244 -#> 6: theta_bar SC white 2010 -1.563808392 1.1325962 -1.296122850 -4.4077860 -#> 7: theta_bar VA black 2009 2.305825333 1.9368960 1.756194045 0.4316253 -#> 8: theta_bar VA black 2010 1.637632813 1.1228341 1.317204830 0.4178312 -#> 9: theta_bar VA other 2009 -1.095120475 1.2038704 -0.784426840 -4.2372739 -#> 10: theta_bar VA other 2010 0.300496502 0.6713402 0.227488241 -0.7974740 -#> 11: theta_bar VA white 2009 -1.078641503 0.9660087 -0.836360911 -3.8903031 -#> 12: theta_bar VA white 2010 -0.887081709 0.6906443 -0.705485848 -2.6783883 +summarize(toy_dgirtfit)
#> param state race3 year mean sd median q_025 +#> 1: theta_bar SC black 2009 2.24559756 1.7897209 1.84180053 0.4522434 +#> 2: theta_bar SC black 2010 1.15755259 0.9116460 0.88343031 0.2488049 +#> 3: theta_bar SC other 2009 -1.29813621 1.2173550 -0.99560220 -4.3745255 +#> 4: theta_bar SC other 2010 0.03000299 0.6902895 0.06824939 -1.4082368 +#> 5: theta_bar SC white 2009 -1.59792561 1.2283236 -1.31250037 -4.7712845 +#> 6: theta_bar SC white 2010 -1.29950834 1.0557300 -1.01959622 -4.0458618 +#> 7: theta_bar VA black 2009 2.02881527 1.4012611 1.67166246 0.4524494 +#> 8: theta_bar VA black 2010 1.40617435 1.0288018 1.11423319 0.3391811 +#> 9: theta_bar VA other 2009 -0.93038106 1.0475514 -0.69822446 -3.4956185 +#> 10: theta_bar VA other 2010 0.27922913 0.5149350 0.21708687 -0.6543708 +#> 11: theta_bar VA white 2009 -0.91333932 0.7394027 -0.72245960 -2.7489035 +#> 12: theta_bar VA white 2010 -0.71577875 0.6234497 -0.55060586 -2.3174531 #> q_975 -#> 1: 8.70794851 -#> 2: 3.74446230 -#> 3: 0.01442099 -#> 4: 1.58650903 -#> 5: -0.26197625 -#> 6: -0.25046472 -#> 7: 7.49825382 -#> 8: 4.62354118 -#> 9: 0.24311882 -#> 10: 1.79960198 -#> 11: -0.12456895 -#> 12: -0.08468892
data(toy_dgirtfit) +#> 1: 6.80052090 +#> 2: 3.52798518 +#> 3: 0.02768110 +#> 4: 1.39749747 +#> 5: -0.28580519 +#> 6: -0.19539782 +#> 7: 5.46631775 +#> 8: 4.35076456 +#> 9: 0.35390364 +#> 10: 1.43108820 +#> 11: -0.11261952 +#> 12: -0.06558131
data(toy_dgirtfit) # access posterior samples as.data.frame(toy_dgirtfit, pars = 'theta_bar')
#> param state race3 year iteration value -#> 1: theta_bar SC black 2009 1 1.0253959 -#> 2: theta_bar SC black 2009 2 0.8703493 -#> 3: theta_bar SC black 2009 3 2.5546916 -#> 4: theta_bar SC black 2009 4 2.8851330 -#> 5: theta_bar SC black 2009 5 1.6683215 +#> 1: theta_bar SC black 2009 1 2.3585309 +#> 2: theta_bar SC black 2009 2 3.7881672 +#> 3: theta_bar SC black 2009 3 2.5182470 +#> 4: theta_bar SC black 2009 4 4.9504883 +#> 5: theta_bar SC black 2009 5 0.5702558 #> --- -#> 9596: theta_bar VA white 2010 796 -0.9399185 -#> 9597: theta_bar VA white 2010 797 -0.8616181 -#> 9598: theta_bar VA white 2010 798 -0.5838702 -#> 9599: theta_bar VA white 2010 799 -0.6810680 -#> 9600: theta_bar VA white 2010 800 -0.2927825
data(toy_dgirtfit) -rhats(toy_dgirtfit)
#> param state race3 year Rhat -#> 1: theta_bar SC black 2009 1.012451 -#> 2: theta_bar VA black 2009 1.017972 -#> 3: theta_bar SC other 2009 1.007918 -#> 4: theta_bar VA other 2009 1.009136 -#> 5: theta_bar SC white 2009 1.011481 -#> 6: theta_bar VA white 2009 1.013624 -#> 7: theta_bar SC black 2010 1.005082 -#> 8: theta_bar VA black 2010 1.008074 -#> 9: theta_bar SC other 2010 1.000072 -#> 10: theta_bar VA other 2010 0.998458 -#> 11: theta_bar SC white 2010 1.009116 -#> 12: theta_bar VA white 2010 1.011247
+#> 9596: theta_bar VA white 2010 796 -0.5245107 +#> 9597: theta_bar VA white 2010 797 -1.4022078 +#> 9598: theta_bar VA white 2010 798 -1.0872082 +#> 9599: theta_bar VA white 2010 799 -0.9778072 +#> 9600: theta_bar VA white 2010 800 -0.9032673
data(toy_dgirtfit) +rhats(toy_dgirtfit)
#> param state race3 year Rhat +#> 1: theta_bar SC black 2009 0.9988403 +#> 2: theta_bar VA black 2009 1.0058328 +#> 3: theta_bar SC other 2009 1.0046187 +#> 4: theta_bar VA other 2009 1.0049422 +#> 5: theta_bar SC white 2009 1.0019199 +#> 6: theta_bar VA white 2009 1.0058461 +#> 7: theta_bar SC black 2010 1.0042491 +#> 8: theta_bar VA black 2010 1.0061799 +#> 9: theta_bar SC other 2010 1.0011256 +#> 10: theta_bar VA other 2010 1.0001827 +#> 11: theta_bar SC white 2010 1.0097541 +#> 12: theta_bar VA white 2010 1.0048440