|
| 1 | +#' Overwrites the get.incidence function for obkData objects to support doBy |
| 2 | +#' |
| 3 | +#' Creates different time series based on the list of factor variables. |
| 4 | +#' This function should eventually migrate back into the OutbreakTools package. |
| 5 | +#' |
| 6 | +#' @author Michael Höhle |
| 7 | +#' @export |
| 8 | +setGeneric("get.incidence2", function(x, ...) standardGeneric("get.incidence2")) |
| 9 | + |
| 10 | +#################### |
| 11 | +## obkData method ## |
| 12 | +#################### |
| 13 | +## |
| 14 | +## based on 'dates' associated to a given field |
| 15 | +## 'values' are optional and can be used to subset the retained 'dates' |
| 16 | +## (e.g. define what a positive case is) |
| 17 | + |
| 18 | +#' More powerful get.incidence method for obkData |
| 19 | +#' |
| 20 | +#' @export |
| 21 | +setMethod("get.incidence2", "obkData", function(x, data, where=NULL, val.min=NULL, val.max=NULL, val.kept=NULL, regexp=NULL, |
| 22 | + from=NULL, to=NULL, interval=1, add.zero=TRUE, doBy=NULL, ...){ |
| 23 | + ## HANDLE ARGUMENTS ## |
| 24 | + if(is.null(val.min)) val.min <- -Inf |
| 25 | + if(is.null(val.max)) val.max <- Inf |
| 26 | + |
| 27 | + |
| 28 | + ## GET DATA ## |
| 29 | + df <- get.data(x, data=data, where=where, showSource=TRUE) |
| 30 | + if(is.null(df)) stop(paste("Data",data,"cannot be found in this obkData object")) |
| 31 | + |
| 32 | + ## call specific procedures if applicable ## |
| 33 | + if(inherits(df, c("obkSequences", "obkContacts"))) { |
| 34 | + return(get.incidence(df, from=from, to=to, |
| 35 | + interval=interval, add.zero=add.zero)) |
| 36 | + } |
| 37 | + |
| 38 | + |
| 39 | + ## OTHERWISE: DATA ASSUMED TAKEN FROM RECORDS ## |
| 40 | + ## if data=='records', keep the first data.frame of the list ## |
| 41 | + if(is.list(df) && !is.data.frame(df) && is.data.frame(df[[1]])) df <- df[[1]] |
| 42 | + |
| 43 | + ## get dates ## |
| 44 | + if(!"date" %in% names(df)) stop("no date in the data") |
| 45 | + dates <- df$date |
| 46 | + |
| 47 | + ## get optional values associated to the dates ## |
| 48 | + ## keep 'data' if it is there |
| 49 | + if(data %in% names(df)){ |
| 50 | + values <- df[[data]] |
| 51 | + } else { ## else keep first optional field |
| 52 | + temp <- !names(df) %in% c("individualID","date") # fields being not "individualID" or "date" |
| 53 | + if(any(temp)) { |
| 54 | + values <- df[,min(which(temp))] |
| 55 | + } else { |
| 56 | + values <- NULL |
| 57 | + } |
| 58 | + } |
| 59 | + |
| 60 | + |
| 61 | + ## EXTRACT RELEVANT DATES ## |
| 62 | + if(!is.null(values)){ |
| 63 | + toKeep <- rep(TRUE, length(values)) |
| 64 | + |
| 65 | + ## if 'values' is numeric ## |
| 66 | + if(is.numeric(values)){ |
| 67 | + toKeep <- toKeep & (values>=val.min & values<=val.max) |
| 68 | + } |
| 69 | + |
| 70 | + ## if val.kept is provided ## |
| 71 | + if(!is.null(val.kept)) { |
| 72 | + toKeep <- toKeep & (values %in% val.kept) |
| 73 | + } |
| 74 | + |
| 75 | + ## if regexp is provided ## |
| 76 | + if(!is.null(regexp)) { |
| 77 | + temp <- rep(FALSE, length(values)) |
| 78 | + temp[grep(regexp, values, ...)] <- TRUE |
| 79 | + toKeep <- toKeep & temp |
| 80 | + } |
| 81 | + |
| 82 | + dates <- dates[toKeep] |
| 83 | + } |
| 84 | + |
| 85 | + ##If there are no dates we are done. |
| 86 | + if(length(dates)==0) return(NULL) |
| 87 | + |
| 88 | + ##Prepare the return list |
| 89 | + res <- list() |
| 90 | + |
| 91 | + #If there is no from-to specification make |
| 92 | + #sure it's not data subset dependend, but is the |
| 93 | + #same for each subset. |
| 94 | + if (is.null(from) & is.null(to)) { |
| 95 | + from <- min(dates) |
| 96 | + to <- max(dates) |
| 97 | + } |
| 98 | + |
| 99 | + ##Loop over all variables in doBy |
| 100 | + if (!is.null(doBy)) { |
| 101 | + for (i in seq_len(length(doBy))) { |
| 102 | + # browser() |
| 103 | + theData <- get.data(x, data=doBy[[i]], showSource=TRUE) |
| 104 | + |
| 105 | + if (is.null(theData)) stop(paste0("Data for ",doBy[[i]]," cannot be found in this obkData object.")) |
| 106 | + if (!is.factor(theData[,doBy[[i]]])) stop("The variable ",doBy[[i]]," is not a factor.") |
| 107 | + |
| 108 | + res[[doBy[i]]] <- tapply(dates, INDEX=theData[,doBy[[i]]], FUN=get.incidence, from=from, to=to, interval=interval, add.zero=add.zero,simplify=FALSE) |
| 109 | + } |
| 110 | + } else { |
| 111 | + res <- list(get.incidence(dates, from=from, to=to, interval=interval, add.zero=add.zero)) |
| 112 | + } |
| 113 | + |
| 114 | + ## RETURN OUTPUT ## |
| 115 | + return(res) |
| 116 | +}) # end obkData method |
| 117 | + |
| 118 | +#' Helper function to format a get.incidence list of data.frames |
| 119 | +#' to a multivariate xts object |
| 120 | +#' |
| 121 | +#' @param incList List of lists containing the data.frames from get.incidence2 |
| 122 | +#' @return An xts object corresponding to the flattened incList |
| 123 | +#' @export |
| 124 | +inc2xts <- function(incList) { |
| 125 | + #Convert each entry of incList from data.frame to xts. It's a list of xts obj |
| 126 | + xtsList <- lapply(incList, function(list) { |
| 127 | + lapply(list, function(df) { |
| 128 | + with(df, as.xts(incidence, order.by=date)) |
| 129 | + }) |
| 130 | + }) |
| 131 | + |
| 132 | + #Code looping over all xts entries and merging them. data.table or plyr |
| 133 | + #might do this better? |
| 134 | + xts <- Reduce(cbind,lapply(xtsList, function(list) Reduce(cbind, list))) |
| 135 | + |
| 136 | + #Manual way of getting pretty (?) column names |
| 137 | + lvl1 <- names(xtsList) |
| 138 | + lvl2 <- lapply(xtsList, names) |
| 139 | + mynames <- paste(rep(lvl1,times=sapply(lvl2,length)), do.call(c,lvl2),sep="-") |
| 140 | + dimnames(xts)[[2]] <- mynames |
| 141 | + |
| 142 | + #Is there a better way?!?! |
| 143 | + |
| 144 | + #Sanity checks |
| 145 | + #all(xtsList[["SEX"]]$male == xts[,"SEX-male"]) |
| 146 | + #all(xtsList[["SEX"]]$female == xts[,"SEX-female"]) |
| 147 | + #all(xtsList[["AGEGRP"]][[1]] == xts[,"AGEGRP-(0,5]"]) |
| 148 | + |
| 149 | + #xts <- Reduce(cbind, Reduce(cbind, xtsList)) |
| 150 | + #do.call(cbind, xtsList) |
| 151 | + #data.table::rbindlist(xtsList) |
| 152 | + |
| 153 | + return(xts) |
| 154 | +} |
| 155 | + |
| 156 | +sandboxIt <- function() { |
| 157 | + source("getIncidence.R") |
| 158 | + |
| 159 | + #Add extra column |
| 160 | + hagelloch.obk@individuals$AGEGRP <- cut(hagelloch.obk@individuals$AGE, breaks=c(0,5,10,Inf)) |
| 161 | + |
| 162 | + |
| 163 | + inc <- get.incidence2(hagelloch.obk, "timeERU", doBy=c("SEX","CL"), add.zero=FALSE) |
| 164 | + |
| 165 | + #Show the time series. |
| 166 | + plot(inc2xts(inc)) |
| 167 | + plot(as.zoo(inc2xts(inc)),plot.type='multiple') |
| 168 | + plot(as.zoo(inc2xts(inc)), screens=1,col=c("magenta","steelblue"),lwd=3,type="h",cex.axis=0.8) |
| 169 | + |
| 170 | + plot(as.zoo(inc2xts(inci)), plot.type='multiple') |
| 171 | + |
| 172 | +} |
0 commit comments