Skip to content

Commit

Permalink
0.2.11
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesdunham authored Oct 28, 2017
2 parents 2df18b4 + d0eecbc commit 367ade3
Show file tree
Hide file tree
Showing 63 changed files with 1,293 additions and 765 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
^readme-cache/
^vignettes/
^wercker\.yml$
^_pkgdown\.yml$
17 changes: 0 additions & 17 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,6 @@ cache: packages
matrix:
include:

- os: linux
dist: precise
r: release
- os: linux
dist: precise
r: devel
- os: linux
dist: precise
r: oldrel

- os: linux
dist: trusty
r: release
Expand All @@ -30,10 +20,6 @@ matrix:
osx_image: xcode8.3
latex: false
r: release
# - os: osx
# osx_image: xcode8.3
# latex: false
# r: devel
- os: osx
osx_image: xcode8.3
latex: false
Expand All @@ -43,9 +29,6 @@ matrix:
- os: osx
latex: false
r: release
# - os: osx
# latex: false
# r: devel
- os: osx
latex: false
r: oldrel
Expand Down
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: dgo
Title: Dynamic Estimation of Group-Level Opinion
Version: 0.2.10
Date: 2017-05-29
Version: 0.2.11
Date: 2017-10-26
Description: Fit dynamic group-level IRT and MRP models from individual or
aggregated item response data. This package handles common preprocessing
tasks and extends functions for inspecting results, poststratification, and
Expand Down Expand Up @@ -43,7 +43,6 @@ Collate:
'class-dgmrp_fit.r'
'dgirt.r'
'dichotomize_item_responses.r'
'expand_rownames.r'
'methods-control.r'
'methods-dgirtfit-plot.r'
'methods-dgirtfit-poststratify.r'
Expand Down
13 changes: 6 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,17 @@ build-cran:
$(R) CMD build . --no-resave-data --no-manual

check:
$(R) CMD check $(PKG)_$(VERSION).tar.gz
$(R) CMD check $(BINARY)

check-cran:
$(R) CMD check --as-cran $(PKG)_$(VERSION).tar.gz
$(R) CMD check --as-cran $(BINARY)

check-quick $(PKG)_$(VERSION).tar.gz:
check-quick $(BINARY):
$(R) $(R_ARGS) CMD build .
$(R) CMD check $(PKG)_$(VERSION).tar.gz
$(R) CMD check $(BINARY)

install: $(PKG)_$(VERSION).tar.gz
$(R) CMD INSTALL --no-multiarch --with-keep.source \
$(PKG)_$(VERSION).tar.gz
install: $(BINARY)
$(R) CMD INSTALL --no-multiarch --with-keep.source $(BINARY)

install-code:
$(R) CMD INSTALL --no-multiarch --with-keep.source --no-docs .
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
S3method(as.data.frame,dgo_fit)
export(dgirt)
export(dgmrp)
export(expand_rownames)
export(plot_rhats)
export(shape)
export(summarize)
Expand Down
12 changes: 12 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
## dgo 0.2.11

* Add poststratification over posterior samples (closes #21).
* `shape()` now accepts aggregated item response data unaccompanied by
individual-level item response data. The `item_data` and `item_names`
arguments are no longer required.
* Add a `max_raked_weight` argument to `shape()` for trimming raked weights.
Note that trimming occurs before raked weights are rescaled to have mean 1,
and the rescaled weights can be larger than `max_raked_weight`.
* Remove the unused function `expand_rownames()`.
* Bugfixes.

## dgo 0.2.10

* Remove Rcpp dependency by rewriting `dichotomize()` in R.
Expand Down
138 changes: 75 additions & 63 deletions R/aggregate_item_responses.r
Original file line number Diff line number Diff line change
Expand Up @@ -23,78 +23,90 @@ make_group_counts <- function(item_data, aggregate_data, ctrl) {
# Because of how DGIRT Stan code iterates over the data, the result must be
# ordered by time, item, and then group. The order of the grouping variables
# doesn't matter.
gt_names <- attr(item_data, "gt_items")
item_data[, c("n_responses") := list(rowSums(!is.na(.SD))),
.SDcols = gt_names]
if (!length(ctrl@weight_name)) {
item_data[, weight := 1L]
ctrl@weight_name <- "weight"
}
item_data[, c("def") := lapply(.SD, calc_design_effects),
.SDcols = ctrl@weight_name,
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]

# get design-effect-adjusted nonmissing response counts by group and item
item_n <- item_data[, lapply(.SD, count_items, get("n_responses"), get("def")),
.SDcols = c(gt_names),
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]
# append _n_grp to the response count columns
item_n_vars <- paste0(gt_names, "_n_grp")
names(item_n) <- replace(names(item_n), match(gt_names, names(item_n)), item_n_vars)
data.table::setkeyv(item_n, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names))
drop_cols <- setdiff(names(item_n), c(key(item_n), item_n_vars))
if (length(drop_cols)) {
item_n[, c(drop_cols) := NULL]
}
if (length(item_data)) {
gt_names <- attr(item_data, "gt_items")
item_data[, c("n_responses") := list(rowSums(!is.na(.SD))),
.SDcols = gt_names]
if (!length(ctrl@weight_name)) {
item_data[, weight := 1L]
ctrl@weight_name <- "weight"
}
item_data[, c("def") := lapply(.SD, calc_design_effects),
.SDcols = ctrl@weight_name,
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]

# get mean ystar
item_data[, c("adj_weight") := get(ctrl@weight_name) / get("n_responses")]
item_means <- item_data[, lapply(.SD, function(x) weighted.mean(x, .SD$adj_weight, na.rm = TRUE)),
.SDcols = c(gt_names, "adj_weight"),
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]
# append _mean to the mean response columns
item_mean_vars <- paste0(gt_names, "_mean")
names(item_means) <- replace(names(item_means), match(gt_names, names(item_means)), item_mean_vars)
data.table::setkeyv(item_means, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names))
drop_cols <- setdiff(names(item_means), c(key(item_means), item_mean_vars))
if (length(drop_cols)) {
item_means[, c(drop_cols) := NULL]
}
# get design-effect-adjusted nonmissing response counts by group and item
item_n <- item_data[, lapply(.SD, count_items, get("n_responses"), get("def")),
.SDcols = c(gt_names),
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]
# append _n_grp to the response count columns
item_n_vars <- paste0(gt_names, "_n_grp")
names(item_n) <- replace(names(item_n), match(gt_names, names(item_n)), item_n_vars)
data.table::setkeyv(item_n, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names))
drop_cols <- setdiff(names(item_n), c(key(item_n), item_n_vars))
if (length(drop_cols)) {
item_n[, c(drop_cols) := NULL]
}

# get mean ystar
item_data[, c("adj_weight") := get(ctrl@weight_name) / get("n_responses")]
item_means <- item_data[, lapply(.SD, function(x) weighted.mean(x, .SD$adj_weight, na.rm = TRUE)),
.SDcols = c(gt_names, "adj_weight"),
by = c(ctrl@geo_name, ctrl@group_names, ctrl@time_name)]
# append _mean to the mean response columns
item_mean_vars <- paste0(gt_names, "_mean")
names(item_means) <- replace(names(item_means), match(gt_names, names(item_means)), item_mean_vars)
data.table::setkeyv(item_means, c(ctrl@time_name, ctrl@geo_name, ctrl@group_names))
drop_cols <- setdiff(names(item_means), c(key(item_means), item_mean_vars))
if (length(drop_cols)) {
item_means[, c(drop_cols) := NULL]
}

# join response counts with means
count_means <- item_n[item_means]
count_means <- count_means[, c(ctrl@time_name, ctrl@geo_name,
ctrl@group_names, item_mean_vars,
item_n_vars), with = FALSE]

# join response counts with means
count_means <- item_n[item_means]
count_means <- count_means[, c(ctrl@time_name, ctrl@geo_name,
ctrl@group_names, item_mean_vars,
item_n_vars), with = FALSE]

# the group success count for an item is the product of its count and mean
item_s_vars <- paste0(gt_names, "_s_grp")
count_means[, c(item_s_vars) := round(count_means[, (item_mean_vars), with = FALSE] *
count_means[, (item_n_vars), with = FALSE], 0)]
count_means <- count_means[, -grep("_mean$", names(count_means)), with = FALSE]


# we want a long table of successes (s_grp) and trials (n_grp) by group and
# item; items need to move from columns to rows
melted <- melt(count_means, id.vars = c(ctrl@time_name, ctrl@geo_name,
ctrl@group_names),
variable.name = "item")
melted[, c("variable") := list(gsub(".*([sn]_grp)$", "\\1", get("item")))]
melted[, c("item") := list(gsub("(.*)_[sn]_grp$", "\\1", get("item")))]
f <- as.formula(paste0(paste(ctrl@time_name, ctrl@geo_name,
paste(ctrl@group_names, collapse = " + "),
"item", sep = "+"), " ~ variable"))
counts <- data.table::dcast.data.table(melted, f, drop = FALSE, fill = 0L)
# the group success count for an item is the product of its count and mean
item_s_vars <- paste0(gt_names, "_s_grp")
count_means[, c(item_s_vars) := round(count_means[, (item_mean_vars), with = FALSE] *
count_means[, (item_n_vars), with = FALSE], 0)]
count_means <- count_means[, -grep("_mean$", names(count_means)), with = FALSE]


# we want a long table of successes (s_grp) and trials (n_grp) by group and
# item; items need to move from columns to rows
melted <- melt(count_means, id.vars = c(ctrl@time_name, ctrl@geo_name,
ctrl@group_names),
variable.name = "item")
melted[, c("variable") := list(gsub(".*([sn]_grp)$", "\\1", get("item")))]
melted[, c("item") := list(gsub("(.*)_[sn]_grp$", "\\1", get("item")))]
f <- as.formula(paste0(paste(ctrl@time_name, ctrl@geo_name,
paste(ctrl@group_names, collapse = " + "),
"item", sep = "+"), " ~ variable"))
counts <- data.table::dcast.data.table(melted, f, drop = FALSE, fill = 0L)
}

# include aggregates, if any
if (length(aggregate_data) && nrow(aggregate_data) > 0) {
if (length(item_data) && length(aggregate_data) && nrow(aggregate_data) > 0) {
# invariant: we have both individual- and aggregate-level item responses
counts <- data.table::rbindlist(list(counts, aggregate_data), use.names =
TRUE)
message("Added ", length(ctrl@aggregate_item_names), " items from aggregate data.")
data.table::setkeyv(counts, c(ctrl@time_name, "item", ctrl@group_names,
ctrl@geo_name))
} else if (length(aggregate_data) && nrow(aggregate_data) > 0) {
# invariant: we have only aggregate-level item responses
# aggregate_data is already in the expected format
counts <- aggregate_data
message("Using ", length(ctrl@aggregate_item_names), " items from aggregate data.")
} else if (!length(item_data)) {
# invariant: we unexpectedly have neither individual- nor aggregate-level data
stop("can't proceed with neither item_data nor aggregate_data")
}

data.table::setkeyv(counts, c(ctrl@time_name, "item", ctrl@group_names,
ctrl@geo_name))

# include unobserved cells
all_groups = expand.grid(c(setNames(list(unique(counts[[ctrl@geo_name]])), ctrl@geo_name),
setNames(list(ctrl@time_filter), ctrl@time_name),
Expand Down
2 changes: 1 addition & 1 deletion R/assertions.r
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ has_all_names <- function(table, names, suggestion = NULL) {
}

assertthat::on_failure(has_all_names) <- function(call, env) {
paste0("not all ", call$names, " are names in ", deparse(call$table))
paste0("not all of ", deparse(call$names), " are names in ", deparse(call$table))
}

all_strings <- function(x) {
Expand Down
20 changes: 15 additions & 5 deletions R/class-control.r
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
setClass("Control",
slots = list(# item data
item_names = "character",
slots = list(item_names = "ANY",
time_name = "character",
geo_name = "character",
group_names = "ANY",
Expand All @@ -22,13 +21,21 @@ setClass("Control",
weight_name = "ANY",
proportion_name = "character",
rake_names = "character",
max_raked_weight = "ANY",
# modeling options
constant_item = "logical"),
constant_item = "logical",
# indicators for state
has_individual_data = "ANY",
has_aggregate_data = "ANY",
has_target_data = "ANY",
has_modifier_data = "ANY"),
validity = function(object) {
if (!length(object@time_name) == 1L)
"\"time_name\" should be a single variable name"
else if (!length(object@geo_name) == 1L)
"\"geo_name\" should be a single variable name"
else if (length(object@item_names) && !is.character(object@item_names))
"if specified \"item_names\" should give variable names in a character vector"
else if (length(object@survey_name) && length(object@survey_name) != 1L)
"if specified \"survey_name\" should be a single variable name"
else if (length(object@survey_name) && !is.character(object@survey_name))
Expand Down Expand Up @@ -64,14 +71,17 @@ setClass("Control",
else if (!length(object@constant_item) == 1L &&
is.logical(object@constant_item))
"\"constant_item\" should be a single logical value"
# else if (length(unique(object@time_filter)) == 1L)
# "if specified \"time_filter\" should give at least two time periods"
else if (length(unique(object@geo_filter)) == 1L)
"if specified \"geo_filter\" should give at least two local geographic areas"
else if (length(object@min_survey_filter) != 1L || object@min_survey_filter <= 0L)
"\"min_survey_filter\" should be a positive integer"
else if (!length(object@min_t_filter) == 1L && object@min_t_filter > 0L)
"\"min_t_filter\" should be a positive integer"
else if (length(object@max_raked_weight) &&
(length(object@max_raked_weight) > 1 |
!is.numeric(object@max_raked_weight))) {
"if specified \"max_raked_weight\" should be a single number"
}
else
TRUE
})
Loading

0 comments on commit 367ade3

Please sign in to comment.